blob: 957689596d5433d98414a99e6e9c6a0d94dcef0b [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
246
247 buf = BUF(v);
248 end = buf + total_v_size;
249
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (univ_newline) {
253 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 if (bzerror != BZ_OK || buf == end)
257 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000261 /* Seeing a \n here with
262 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263 * saw a \r before.
264 */
265 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000266 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
272 }
273 }
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
281 }
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000285 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
301 }
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
315 }
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
326}
327
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328/* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330size_t
331Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333{
334 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 int newlinetypes, skipnextlf;
336
337 assert(buf != NULL);
338 assert(stream != NULL);
339
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
342
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
345
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
348 */
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
353
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
364 }
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
370 }
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
375 */
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
382 }
383 }
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
389 }
390 }
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
394}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395
396/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000398Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
403 }
404}
405
406/* This is a hacked version of Python's fileobject.c:readahead(). */
407static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409{
410 int chunksize;
411 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412
413 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000414 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000415 return 0;
416 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
781PyDoc_STRVAR(BZ2File_write__doc__,
782"write(data) -> None\n\
783\n\
784Write the 'data' string to file. Note that due to buffering, close() may\n\
785be needed before the file on disk reflects the data written.\n\
786");
787
788/* This is a hacked version of Python's fileobject.c:file_write(). */
789static PyObject *
790BZ2File_write(BZ2FileObject *self, PyObject *args)
791{
792 PyObject *ret = NULL;
793 char *buf;
794 int len;
795 int bzerror;
796
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000797 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000798 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000799
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000800 ACQUIRE_LOCK(self);
801 switch (self->mode) {
802 case MODE_WRITE:
803 break;
Tim Peterse3228092002-11-09 04:21:44 +0000804
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 case MODE_CLOSED:
806 PyErr_SetString(PyExc_ValueError,
807 "I/O operation on closed file");
808 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000809
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 default:
811 PyErr_SetString(PyExc_IOError,
812 "file is not ready for writing");
813 goto cleanup;;
814 }
815
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000816 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817
818 Py_BEGIN_ALLOW_THREADS
819 BZ2_bzWrite (&bzerror, self->fp, buf, len);
820 self->pos += len;
821 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000822
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000823 if (bzerror != BZ_OK) {
824 Util_CatchBZ2Error(bzerror);
825 goto cleanup;
826 }
Tim Peterse3228092002-11-09 04:21:44 +0000827
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 Py_INCREF(Py_None);
829 ret = Py_None;
830
831cleanup:
832 RELEASE_LOCK(self);
833 return ret;
834}
835
836PyDoc_STRVAR(BZ2File_writelines__doc__,
837"writelines(sequence_of_strings) -> None\n\
838\n\
839Write the sequence of strings to the file. Note that newlines are not\n\
840added. The sequence can be any iterable object producing strings. This is\n\
841equivalent to calling write() for each string.\n\
842");
843
844/* This is a hacked version of Python's fileobject.c:file_writelines(). */
845static PyObject *
846BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
847{
848#define CHUNKSIZE 1000
849 PyObject *list = NULL;
850 PyObject *iter = NULL;
851 PyObject *ret = NULL;
852 PyObject *line;
853 int i, j, index, len, islist;
854 int bzerror;
855
856 ACQUIRE_LOCK(self);
857 islist = PyList_Check(seq);
858 if (!islist) {
859 iter = PyObject_GetIter(seq);
860 if (iter == NULL) {
861 PyErr_SetString(PyExc_TypeError,
862 "writelines() requires an iterable argument");
863 goto error;
864 }
865 list = PyList_New(CHUNKSIZE);
866 if (list == NULL)
867 goto error;
868 }
869
870 /* Strategy: slurp CHUNKSIZE lines into a private list,
871 checking that they are all strings, then write that list
872 without holding the interpreter lock, then come back for more. */
873 for (index = 0; ; index += CHUNKSIZE) {
874 if (islist) {
875 Py_XDECREF(list);
876 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
877 if (list == NULL)
878 goto error;
879 j = PyList_GET_SIZE(list);
880 }
881 else {
882 for (j = 0; j < CHUNKSIZE; j++) {
883 line = PyIter_Next(iter);
884 if (line == NULL) {
885 if (PyErr_Occurred())
886 goto error;
887 break;
888 }
889 PyList_SetItem(list, j, line);
890 }
891 }
892 if (j == 0)
893 break;
894
895 /* Check that all entries are indeed strings. If not,
896 apply the same rules as for file.write() and
897 convert the rets to strings. This is slow, but
898 seems to be the only way since all conversion APIs
899 could potentially execute Python code. */
900 for (i = 0; i < j; i++) {
901 PyObject *v = PyList_GET_ITEM(list, i);
902 if (!PyString_Check(v)) {
903 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000904 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000905 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
906 PyErr_SetString(PyExc_TypeError,
907 "writelines() "
908 "argument must be "
909 "a sequence of "
910 "strings");
911 goto error;
912 }
913 line = PyString_FromStringAndSize(buffer,
914 len);
915 if (line == NULL)
916 goto error;
917 Py_DECREF(v);
918 PyList_SET_ITEM(list, i, line);
919 }
920 }
921
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000922 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000923
924 /* Since we are releasing the global lock, the
925 following code may *not* execute Python code. */
926 Py_BEGIN_ALLOW_THREADS
927 for (i = 0; i < j; i++) {
928 line = PyList_GET_ITEM(list, i);
929 len = PyString_GET_SIZE(line);
930 BZ2_bzWrite (&bzerror, self->fp,
931 PyString_AS_STRING(line), len);
932 if (bzerror != BZ_OK) {
933 Py_BLOCK_THREADS
934 Util_CatchBZ2Error(bzerror);
935 goto error;
936 }
937 }
938 Py_END_ALLOW_THREADS
939
940 if (j < CHUNKSIZE)
941 break;
942 }
943
944 Py_INCREF(Py_None);
945 ret = Py_None;
946
947 error:
948 RELEASE_LOCK(self);
949 Py_XDECREF(list);
950 Py_XDECREF(iter);
951 return ret;
952#undef CHUNKSIZE
953}
954
955PyDoc_STRVAR(BZ2File_seek__doc__,
956"seek(offset [, whence]) -> None\n\
957\n\
958Move to new file position. Argument offset is a byte count. Optional\n\
959argument whence defaults to 0 (offset from start of file, offset\n\
960should be >= 0); other values are 1 (move relative to current position,\n\
961positive or negative), and 2 (move relative to end of file, usually\n\
962negative, although many platforms allow seeking beyond the end of a file).\n\
963\n\
964Note that seeking of bz2 files is emulated, and depending on the parameters\n\
965the operation may be extremely slow.\n\
966");
967
968static PyObject *
969BZ2File_seek(BZ2FileObject *self, PyObject *args)
970{
971 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000972 PyObject *offobj;
973 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000974 char small_buffer[SMALLCHUNK];
975 char *buffer = small_buffer;
976 size_t buffersize = SMALLCHUNK;
977 int bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000978 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000979 int chunksize;
980 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000982
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000983 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
984 return NULL;
985#if !defined(HAVE_LARGEFILE_SUPPORT)
986 offset = PyInt_AsLong(offobj);
987#else
988 offset = PyLong_Check(offobj) ?
989 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
990#endif
991 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000992 return NULL;
993
994 ACQUIRE_LOCK(self);
995 Util_DropReadAhead(self);
996 switch (self->mode) {
997 case MODE_READ:
998 case MODE_READ_EOF:
999 break;
Tim Peterse3228092002-11-09 04:21:44 +00001000
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001001 case MODE_CLOSED:
1002 PyErr_SetString(PyExc_ValueError,
1003 "I/O operation on closed file");
1004 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001005
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001006 default:
1007 PyErr_SetString(PyExc_IOError,
1008 "seek works only while reading");
1009 goto cleanup;;
1010 }
1011
Georg Brandl47fab922006-02-18 21:57:25 +00001012 if (where == 2) {
1013 if (self->size == -1) {
1014 assert(self->mode != MODE_READ_EOF);
1015 for (;;) {
1016 Py_BEGIN_ALLOW_THREADS
1017 chunksize = Util_UnivNewlineRead(
1018 &bzerror, self->fp,
1019 buffer, buffersize,
1020 self);
1021 self->pos += chunksize;
1022 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001023
Georg Brandl47fab922006-02-18 21:57:25 +00001024 bytesread += chunksize;
1025 if (bzerror == BZ_STREAM_END) {
1026 break;
1027 } else if (bzerror != BZ_OK) {
1028 Util_CatchBZ2Error(bzerror);
1029 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001030 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031 }
Georg Brandl47fab922006-02-18 21:57:25 +00001032 self->mode = MODE_READ_EOF;
1033 self->size = self->pos;
1034 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001035 }
Georg Brandl47fab922006-02-18 21:57:25 +00001036 offset = self->size + offset;
1037 } else if (where == 1) {
1038 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039 }
1040
Georg Brandl47fab922006-02-18 21:57:25 +00001041 /* Before getting here, offset must be the absolute position the file
1042 * pointer should be set to. */
1043
1044 if (offset >= self->pos) {
1045 /* we can move forward */
1046 offset -= self->pos;
1047 } else {
1048 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001050 if (bzerror != BZ_OK) {
1051 Util_CatchBZ2Error(bzerror);
1052 goto cleanup;
1053 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001054 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001055 if (!ret)
1056 goto cleanup;
1057 Py_DECREF(ret);
1058 ret = NULL;
1059 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001060 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061 0, 0, NULL, 0);
1062 if (bzerror != BZ_OK) {
1063 Util_CatchBZ2Error(bzerror);
1064 goto cleanup;
1065 }
1066 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067 }
1068
Georg Brandl47fab922006-02-18 21:57:25 +00001069 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 goto exit;
1071
1072 /* Before getting here, offset must be set to the number of bytes
1073 * to walk forward. */
1074 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001075 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076 readsize = buffersize;
1077 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001078 /* offset might be wider that readsize, but the result
1079 * of the subtraction is bound by buffersize (see the
1080 * condition above). buffersize is 8192. */
1081 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001082 Py_BEGIN_ALLOW_THREADS
1083 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1084 buffer, readsize, self);
1085 self->pos += chunksize;
1086 Py_END_ALLOW_THREADS
1087 bytesread += chunksize;
1088 if (bzerror == BZ_STREAM_END) {
1089 self->size = self->pos;
1090 self->mode = MODE_READ_EOF;
1091 break;
1092 } else if (bzerror != BZ_OK) {
1093 Util_CatchBZ2Error(bzerror);
1094 goto cleanup;
1095 }
1096 if (bytesread == offset)
1097 break;
1098 }
1099
1100exit:
1101 Py_INCREF(Py_None);
1102 ret = Py_None;
1103
1104cleanup:
1105 RELEASE_LOCK(self);
1106 return ret;
1107}
1108
1109PyDoc_STRVAR(BZ2File_tell__doc__,
1110"tell() -> int\n\
1111\n\
1112Return the current file position, an integer (may be a long integer).\n\
1113");
1114
1115static PyObject *
1116BZ2File_tell(BZ2FileObject *self, PyObject *args)
1117{
1118 PyObject *ret = NULL;
1119
1120 if (self->mode == MODE_CLOSED) {
1121 PyErr_SetString(PyExc_ValueError,
1122 "I/O operation on closed file");
1123 goto cleanup;
1124 }
1125
Georg Brandla8bcecc2005-09-03 07:49:53 +00001126#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001127 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001128#else
1129 ret = PyLong_FromLongLong(self->pos);
1130#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001131
1132cleanup:
1133 return ret;
1134}
1135
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001136PyDoc_STRVAR(BZ2File_close__doc__,
1137"close() -> None or (perhaps) an integer\n\
1138\n\
1139Close the file. Sets data attribute .closed to true. A closed file\n\
1140cannot be used for further I/O operations. close() may be called more\n\
1141than once without error.\n\
1142");
1143
1144static PyObject *
1145BZ2File_close(BZ2FileObject *self)
1146{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001147 PyObject *ret = NULL;
1148 int bzerror = BZ_OK;
1149
1150 ACQUIRE_LOCK(self);
1151 switch (self->mode) {
1152 case MODE_READ:
1153 case MODE_READ_EOF:
1154 BZ2_bzReadClose(&bzerror, self->fp);
1155 break;
1156 case MODE_WRITE:
1157 BZ2_bzWriteClose(&bzerror, self->fp,
1158 0, NULL, NULL);
1159 break;
1160 }
1161 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001162 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163 if (bzerror != BZ_OK) {
1164 Util_CatchBZ2Error(bzerror);
1165 Py_XDECREF(ret);
1166 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167 }
1168
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001169 RELEASE_LOCK(self);
1170 return ret;
1171}
1172
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001173static PyObject *BZ2File_getiter(BZ2FileObject *self);
1174
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001175static PyMethodDef BZ2File_methods[] = {
1176 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1177 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1178 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1179 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1180 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1181 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1182 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001183 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1184 {NULL, NULL} /* sentinel */
1185};
1186
1187
1188/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001189/* Getters and setters of BZ2File. */
1190
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001191/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1192static PyObject *
1193BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1194{
1195 switch (self->f_newlinetypes) {
1196 case NEWLINE_UNKNOWN:
1197 Py_INCREF(Py_None);
1198 return Py_None;
1199 case NEWLINE_CR:
1200 return PyString_FromString("\r");
1201 case NEWLINE_LF:
1202 return PyString_FromString("\n");
1203 case NEWLINE_CR|NEWLINE_LF:
1204 return Py_BuildValue("(ss)", "\r", "\n");
1205 case NEWLINE_CRLF:
1206 return PyString_FromString("\r\n");
1207 case NEWLINE_CR|NEWLINE_CRLF:
1208 return Py_BuildValue("(ss)", "\r", "\r\n");
1209 case NEWLINE_LF|NEWLINE_CRLF:
1210 return Py_BuildValue("(ss)", "\n", "\r\n");
1211 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1212 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1213 default:
1214 PyErr_Format(PyExc_SystemError,
1215 "Unknown newlines value 0x%x\n",
1216 self->f_newlinetypes);
1217 return NULL;
1218 }
1219}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001220
1221static PyObject *
1222BZ2File_get_closed(BZ2FileObject *self, void *closure)
1223{
1224 return PyInt_FromLong(self->mode == MODE_CLOSED);
1225}
1226
1227static PyObject *
1228BZ2File_get_mode(BZ2FileObject *self, void *closure)
1229{
1230 return PyObject_GetAttrString(self->file, "mode");
1231}
1232
1233static PyObject *
1234BZ2File_get_name(BZ2FileObject *self, void *closure)
1235{
1236 return PyObject_GetAttrString(self->file, "name");
1237}
1238
1239static PyGetSetDef BZ2File_getset[] = {
1240 {"closed", (getter)BZ2File_get_closed, NULL,
1241 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001242 {"newlines", (getter)BZ2File_get_newlines, NULL,
1243 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001244 {"mode", (getter)BZ2File_get_mode, NULL,
1245 "file mode ('r', 'w', or 'U')"},
1246 {"name", (getter)BZ2File_get_name, NULL,
1247 "file name"},
1248 {NULL} /* Sentinel */
1249};
1250
1251
1252/* ===================================================================== */
1253/* Members of BZ2File_Type. */
1254
1255#undef OFF
1256#define OFF(x) offsetof(BZ2FileObject, x)
1257
1258static PyMemberDef BZ2File_members[] = {
1259 {"softspace", T_INT, OFF(f_softspace), 0,
1260 "flag indicating that a space needs to be printed; used by print"},
1261 {NULL} /* Sentinel */
1262};
1263
1264/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001265/* Slot definitions for BZ2File_Type. */
1266
1267static int
1268BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1269{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001270 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001271 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001272 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001273 char *mode = "r";
1274 int buffering = -1;
1275 int compresslevel = 9;
1276 int bzerror;
1277 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001278
1279 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001280
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001281 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1282 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001283 &compresslevel))
1284 return -1;
1285
1286 if (compresslevel < 1 || compresslevel > 9) {
1287 PyErr_SetString(PyExc_ValueError,
1288 "compresslevel must be between 1 and 9");
1289 return -1;
1290 }
1291
1292 for (;;) {
1293 int error = 0;
1294 switch (*mode) {
1295 case 'r':
1296 case 'w':
1297 if (mode_char)
1298 error = 1;
1299 mode_char = *mode;
1300 break;
1301
1302 case 'b':
1303 break;
1304
1305 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001306 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001307 break;
1308
1309 default:
1310 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001311 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001312 }
1313 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001314 PyErr_Format(PyExc_ValueError,
1315 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001316 return -1;
1317 }
1318 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001319 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001320 break;
1321 }
1322
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001323 if (mode_char == 0) {
1324 mode_char = 'r';
1325 }
1326
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001327 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001328
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001329 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1330 name, mode, buffering);
1331 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001332 return -1;
1333
1334 /* From now on, we have stuff to dealloc, so jump to error label
1335 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001336
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337#ifdef WITH_THREAD
1338 self->lock = PyThread_allocate_lock();
1339 if (!self->lock)
1340 goto error;
1341#endif
1342
1343 if (mode_char == 'r')
1344 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001345 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001346 0, 0, NULL, 0);
1347 else
1348 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001349 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001350 compresslevel, 0, 0);
1351
1352 if (bzerror != BZ_OK) {
1353 Util_CatchBZ2Error(bzerror);
1354 goto error;
1355 }
1356
1357 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1358
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001359 return 0;
1360
1361error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001362 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001363#ifdef WITH_THREAD
1364 if (self->lock)
1365 PyThread_free_lock(self->lock);
1366#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001367 return -1;
1368}
1369
1370static void
1371BZ2File_dealloc(BZ2FileObject *self)
1372{
1373 int bzerror;
1374#ifdef WITH_THREAD
1375 if (self->lock)
1376 PyThread_free_lock(self->lock);
1377#endif
1378 switch (self->mode) {
1379 case MODE_READ:
1380 case MODE_READ_EOF:
1381 BZ2_bzReadClose(&bzerror, self->fp);
1382 break;
1383 case MODE_WRITE:
1384 BZ2_bzWriteClose(&bzerror, self->fp,
1385 0, NULL, NULL);
1386 break;
1387 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001388 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001389 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001390 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001391}
1392
1393/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1394static PyObject *
1395BZ2File_getiter(BZ2FileObject *self)
1396{
1397 if (self->mode == MODE_CLOSED) {
1398 PyErr_SetString(PyExc_ValueError,
1399 "I/O operation on closed file");
1400 return NULL;
1401 }
1402 Py_INCREF((PyObject*)self);
1403 return (PyObject *)self;
1404}
1405
1406/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1407#define READAHEAD_BUFSIZE 8192
1408static PyObject *
1409BZ2File_iternext(BZ2FileObject *self)
1410{
1411 PyStringObject* ret;
1412 ACQUIRE_LOCK(self);
1413 if (self->mode == MODE_CLOSED) {
1414 PyErr_SetString(PyExc_ValueError,
1415 "I/O operation on closed file");
1416 return NULL;
1417 }
1418 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1419 RELEASE_LOCK(self);
1420 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1421 Py_XDECREF(ret);
1422 return NULL;
1423 }
1424 return (PyObject *)ret;
1425}
1426
1427/* ===================================================================== */
1428/* BZ2File_Type definition. */
1429
1430PyDoc_VAR(BZ2File__doc__) =
1431PyDoc_STR(
1432"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1433\n\
1434Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1435writing. When opened for writing, the file will be created if it doesn't\n\
1436exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1437unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1438is given, must be a number between 1 and 9.\n\
1439")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001440PyDoc_STR(
1441"\n\
1442Add a 'U' to mode to open the file for input with universal newline\n\
1443support. Any line ending in the input file will be seen as a '\\n' in\n\
1444Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1445for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1446'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1447newlines are available only when reading.\n\
1448")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001449;
1450
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001451static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001452 PyObject_HEAD_INIT(NULL)
1453 0, /*ob_size*/
1454 "bz2.BZ2File", /*tp_name*/
1455 sizeof(BZ2FileObject), /*tp_basicsize*/
1456 0, /*tp_itemsize*/
1457 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1458 0, /*tp_print*/
1459 0, /*tp_getattr*/
1460 0, /*tp_setattr*/
1461 0, /*tp_compare*/
1462 0, /*tp_repr*/
1463 0, /*tp_as_number*/
1464 0, /*tp_as_sequence*/
1465 0, /*tp_as_mapping*/
1466 0, /*tp_hash*/
1467 0, /*tp_call*/
1468 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001469 PyObject_GenericGetAttr,/*tp_getattro*/
1470 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001471 0, /*tp_as_buffer*/
1472 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1473 BZ2File__doc__, /*tp_doc*/
1474 0, /*tp_traverse*/
1475 0, /*tp_clear*/
1476 0, /*tp_richcompare*/
1477 0, /*tp_weaklistoffset*/
1478 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1479 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1480 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001481 BZ2File_members, /*tp_members*/
1482 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001483 0, /*tp_base*/
1484 0, /*tp_dict*/
1485 0, /*tp_descr_get*/
1486 0, /*tp_descr_set*/
1487 0, /*tp_dictoffset*/
1488 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001489 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001490 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001491 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492 0, /*tp_is_gc*/
1493};
1494
1495
1496/* ===================================================================== */
1497/* Methods of BZ2Comp. */
1498
1499PyDoc_STRVAR(BZ2Comp_compress__doc__,
1500"compress(data) -> string\n\
1501\n\
1502Provide more data to the compressor object. It will return chunks of\n\
1503compressed data whenever possible. When you've finished providing data\n\
1504to compress, call the flush() method to finish the compression process,\n\
1505and return what is left in the internal buffers.\n\
1506");
1507
1508static PyObject *
1509BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1510{
1511 char *data;
1512 int datasize;
1513 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001514 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001515 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001516 bz_stream *bzs = &self->bzs;
1517 int bzerror;
1518
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001519 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001520 return NULL;
1521
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001522 if (datasize == 0)
1523 return PyString_FromString("");
1524
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001525 ACQUIRE_LOCK(self);
1526 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001527 PyErr_SetString(PyExc_ValueError,
1528 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001529 goto error;
1530 }
1531
1532 ret = PyString_FromStringAndSize(NULL, bufsize);
1533 if (!ret)
1534 goto error;
1535
1536 bzs->next_in = data;
1537 bzs->avail_in = datasize;
1538 bzs->next_out = BUF(ret);
1539 bzs->avail_out = bufsize;
1540
1541 totalout = BZS_TOTAL_OUT(bzs);
1542
1543 for (;;) {
1544 Py_BEGIN_ALLOW_THREADS
1545 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1546 Py_END_ALLOW_THREADS
1547 if (bzerror != BZ_RUN_OK) {
1548 Util_CatchBZ2Error(bzerror);
1549 goto error;
1550 }
1551 if (bzs->avail_out == 0) {
1552 bufsize = Util_NewBufferSize(bufsize);
1553 if (_PyString_Resize(&ret, bufsize) < 0) {
1554 BZ2_bzCompressEnd(bzs);
1555 goto error;
1556 }
1557 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1558 - totalout);
1559 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1560 } else if (bzs->avail_in == 0) {
1561 break;
1562 }
1563 }
1564
Tim Petersf29f0c62002-11-09 04:28:17 +00001565 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001566
1567 RELEASE_LOCK(self);
1568 return ret;
1569
1570error:
1571 RELEASE_LOCK(self);
1572 Py_XDECREF(ret);
1573 return NULL;
1574}
1575
1576PyDoc_STRVAR(BZ2Comp_flush__doc__,
1577"flush() -> string\n\
1578\n\
1579Finish the compression process and return what is left in internal buffers.\n\
1580You must not use the compressor object after calling this method.\n\
1581");
1582
1583static PyObject *
1584BZ2Comp_flush(BZ2CompObject *self)
1585{
1586 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001587 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001588 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001589 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001590 int bzerror;
1591
1592 ACQUIRE_LOCK(self);
1593 if (!self->running) {
1594 PyErr_SetString(PyExc_ValueError, "object was already "
1595 "flushed");
1596 goto error;
1597 }
1598 self->running = 0;
1599
1600 ret = PyString_FromStringAndSize(NULL, bufsize);
1601 if (!ret)
1602 goto error;
1603
1604 bzs->next_out = BUF(ret);
1605 bzs->avail_out = bufsize;
1606
1607 totalout = BZS_TOTAL_OUT(bzs);
1608
1609 for (;;) {
1610 Py_BEGIN_ALLOW_THREADS
1611 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1612 Py_END_ALLOW_THREADS
1613 if (bzerror == BZ_STREAM_END) {
1614 break;
1615 } else if (bzerror != BZ_FINISH_OK) {
1616 Util_CatchBZ2Error(bzerror);
1617 goto error;
1618 }
1619 if (bzs->avail_out == 0) {
1620 bufsize = Util_NewBufferSize(bufsize);
1621 if (_PyString_Resize(&ret, bufsize) < 0)
1622 goto error;
1623 bzs->next_out = BUF(ret);
1624 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1625 - totalout);
1626 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1627 }
1628 }
1629
1630 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001631 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001632
1633 RELEASE_LOCK(self);
1634 return ret;
1635
1636error:
1637 RELEASE_LOCK(self);
1638 Py_XDECREF(ret);
1639 return NULL;
1640}
1641
1642static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001643 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1644 BZ2Comp_compress__doc__},
1645 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1646 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001647 {NULL, NULL} /* sentinel */
1648};
1649
1650
1651/* ===================================================================== */
1652/* Slot definitions for BZ2Comp_Type. */
1653
1654static int
1655BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1656{
1657 int compresslevel = 9;
1658 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001659 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001660
1661 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1662 kwlist, &compresslevel))
1663 return -1;
1664
1665 if (compresslevel < 1 || compresslevel > 9) {
1666 PyErr_SetString(PyExc_ValueError,
1667 "compresslevel must be between 1 and 9");
1668 goto error;
1669 }
1670
1671#ifdef WITH_THREAD
1672 self->lock = PyThread_allocate_lock();
1673 if (!self->lock)
1674 goto error;
1675#endif
1676
1677 memset(&self->bzs, 0, sizeof(bz_stream));
1678 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1679 if (bzerror != BZ_OK) {
1680 Util_CatchBZ2Error(bzerror);
1681 goto error;
1682 }
1683
1684 self->running = 1;
1685
1686 return 0;
1687error:
1688#ifdef WITH_THREAD
1689 if (self->lock)
1690 PyThread_free_lock(self->lock);
1691#endif
1692 return -1;
1693}
1694
1695static void
1696BZ2Comp_dealloc(BZ2CompObject *self)
1697{
1698#ifdef WITH_THREAD
1699 if (self->lock)
1700 PyThread_free_lock(self->lock);
1701#endif
1702 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001703 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001704}
1705
1706
1707/* ===================================================================== */
1708/* BZ2Comp_Type definition. */
1709
1710PyDoc_STRVAR(BZ2Comp__doc__,
1711"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1712\n\
1713Create a new compressor object. This object may be used to compress\n\
1714data sequentially. If you want to compress data in one shot, use the\n\
1715compress() function instead. The compresslevel parameter, if given,\n\
1716must be a number between 1 and 9.\n\
1717");
1718
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001719static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001720 PyObject_HEAD_INIT(NULL)
1721 0, /*ob_size*/
1722 "bz2.BZ2Compressor", /*tp_name*/
1723 sizeof(BZ2CompObject), /*tp_basicsize*/
1724 0, /*tp_itemsize*/
1725 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1726 0, /*tp_print*/
1727 0, /*tp_getattr*/
1728 0, /*tp_setattr*/
1729 0, /*tp_compare*/
1730 0, /*tp_repr*/
1731 0, /*tp_as_number*/
1732 0, /*tp_as_sequence*/
1733 0, /*tp_as_mapping*/
1734 0, /*tp_hash*/
1735 0, /*tp_call*/
1736 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001737 PyObject_GenericGetAttr,/*tp_getattro*/
1738 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001739 0, /*tp_as_buffer*/
1740 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1741 BZ2Comp__doc__, /*tp_doc*/
1742 0, /*tp_traverse*/
1743 0, /*tp_clear*/
1744 0, /*tp_richcompare*/
1745 0, /*tp_weaklistoffset*/
1746 0, /*tp_iter*/
1747 0, /*tp_iternext*/
1748 BZ2Comp_methods, /*tp_methods*/
1749 0, /*tp_members*/
1750 0, /*tp_getset*/
1751 0, /*tp_base*/
1752 0, /*tp_dict*/
1753 0, /*tp_descr_get*/
1754 0, /*tp_descr_set*/
1755 0, /*tp_dictoffset*/
1756 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001757 PyType_GenericAlloc, /*tp_alloc*/
1758 PyType_GenericNew, /*tp_new*/
1759 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001760 0, /*tp_is_gc*/
1761};
1762
1763
1764/* ===================================================================== */
1765/* Members of BZ2Decomp. */
1766
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001767#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001768#define OFF(x) offsetof(BZ2DecompObject, x)
1769
1770static PyMemberDef BZ2Decomp_members[] = {
1771 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1772 {NULL} /* Sentinel */
1773};
1774
1775
1776/* ===================================================================== */
1777/* Methods of BZ2Decomp. */
1778
1779PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1780"decompress(data) -> string\n\
1781\n\
1782Provide more data to the decompressor object. It will return chunks\n\
1783of decompressed data whenever possible. If you try to decompress data\n\
1784after the end of stream is found, EOFError will be raised. If any data\n\
1785was found after the end of stream, it'll be ignored and saved in\n\
1786unused_data attribute.\n\
1787");
1788
1789static PyObject *
1790BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1791{
1792 char *data;
1793 int datasize;
1794 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001795 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001796 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001797 bz_stream *bzs = &self->bzs;
1798 int bzerror;
1799
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001800 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801 return NULL;
1802
1803 ACQUIRE_LOCK(self);
1804 if (!self->running) {
1805 PyErr_SetString(PyExc_EOFError, "end of stream was "
1806 "already found");
1807 goto error;
1808 }
1809
1810 ret = PyString_FromStringAndSize(NULL, bufsize);
1811 if (!ret)
1812 goto error;
1813
1814 bzs->next_in = data;
1815 bzs->avail_in = datasize;
1816 bzs->next_out = BUF(ret);
1817 bzs->avail_out = bufsize;
1818
1819 totalout = BZS_TOTAL_OUT(bzs);
1820
1821 for (;;) {
1822 Py_BEGIN_ALLOW_THREADS
1823 bzerror = BZ2_bzDecompress(bzs);
1824 Py_END_ALLOW_THREADS
1825 if (bzerror == BZ_STREAM_END) {
1826 if (bzs->avail_in != 0) {
1827 Py_DECREF(self->unused_data);
1828 self->unused_data =
1829 PyString_FromStringAndSize(bzs->next_in,
1830 bzs->avail_in);
1831 }
1832 self->running = 0;
1833 break;
1834 }
1835 if (bzerror != BZ_OK) {
1836 Util_CatchBZ2Error(bzerror);
1837 goto error;
1838 }
1839 if (bzs->avail_out == 0) {
1840 bufsize = Util_NewBufferSize(bufsize);
1841 if (_PyString_Resize(&ret, bufsize) < 0) {
1842 BZ2_bzDecompressEnd(bzs);
1843 goto error;
1844 }
1845 bzs->next_out = BUF(ret);
1846 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1847 - totalout);
1848 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1849 } else if (bzs->avail_in == 0) {
1850 break;
1851 }
1852 }
1853
1854 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001855 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001856
1857 RELEASE_LOCK(self);
1858 return ret;
1859
1860error:
1861 RELEASE_LOCK(self);
1862 Py_XDECREF(ret);
1863 return NULL;
1864}
1865
1866static PyMethodDef BZ2Decomp_methods[] = {
1867 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1868 {NULL, NULL} /* sentinel */
1869};
1870
1871
1872/* ===================================================================== */
1873/* Slot definitions for BZ2Decomp_Type. */
1874
1875static int
1876BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1877{
1878 int bzerror;
1879
1880 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1881 return -1;
1882
1883#ifdef WITH_THREAD
1884 self->lock = PyThread_allocate_lock();
1885 if (!self->lock)
1886 goto error;
1887#endif
1888
1889 self->unused_data = PyString_FromString("");
1890 if (!self->unused_data)
1891 goto error;
1892
1893 memset(&self->bzs, 0, sizeof(bz_stream));
1894 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1895 if (bzerror != BZ_OK) {
1896 Util_CatchBZ2Error(bzerror);
1897 goto error;
1898 }
1899
1900 self->running = 1;
1901
1902 return 0;
1903
1904error:
1905#ifdef WITH_THREAD
1906 if (self->lock)
1907 PyThread_free_lock(self->lock);
1908#endif
1909 Py_XDECREF(self->unused_data);
1910 return -1;
1911}
1912
1913static void
1914BZ2Decomp_dealloc(BZ2DecompObject *self)
1915{
1916#ifdef WITH_THREAD
1917 if (self->lock)
1918 PyThread_free_lock(self->lock);
1919#endif
1920 Py_XDECREF(self->unused_data);
1921 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001922 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001923}
1924
1925
1926/* ===================================================================== */
1927/* BZ2Decomp_Type definition. */
1928
1929PyDoc_STRVAR(BZ2Decomp__doc__,
1930"BZ2Decompressor() -> decompressor object\n\
1931\n\
1932Create a new decompressor object. This object may be used to decompress\n\
1933data sequentially. If you want to decompress data in one shot, use the\n\
1934decompress() function instead.\n\
1935");
1936
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001937static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001938 PyObject_HEAD_INIT(NULL)
1939 0, /*ob_size*/
1940 "bz2.BZ2Decompressor", /*tp_name*/
1941 sizeof(BZ2DecompObject), /*tp_basicsize*/
1942 0, /*tp_itemsize*/
1943 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1944 0, /*tp_print*/
1945 0, /*tp_getattr*/
1946 0, /*tp_setattr*/
1947 0, /*tp_compare*/
1948 0, /*tp_repr*/
1949 0, /*tp_as_number*/
1950 0, /*tp_as_sequence*/
1951 0, /*tp_as_mapping*/
1952 0, /*tp_hash*/
1953 0, /*tp_call*/
1954 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001955 PyObject_GenericGetAttr,/*tp_getattro*/
1956 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001957 0, /*tp_as_buffer*/
1958 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1959 BZ2Decomp__doc__, /*tp_doc*/
1960 0, /*tp_traverse*/
1961 0, /*tp_clear*/
1962 0, /*tp_richcompare*/
1963 0, /*tp_weaklistoffset*/
1964 0, /*tp_iter*/
1965 0, /*tp_iternext*/
1966 BZ2Decomp_methods, /*tp_methods*/
1967 BZ2Decomp_members, /*tp_members*/
1968 0, /*tp_getset*/
1969 0, /*tp_base*/
1970 0, /*tp_dict*/
1971 0, /*tp_descr_get*/
1972 0, /*tp_descr_set*/
1973 0, /*tp_dictoffset*/
1974 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001975 PyType_GenericAlloc, /*tp_alloc*/
1976 PyType_GenericNew, /*tp_new*/
1977 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001978 0, /*tp_is_gc*/
1979};
1980
1981
1982/* ===================================================================== */
1983/* Module functions. */
1984
1985PyDoc_STRVAR(bz2_compress__doc__,
1986"compress(data [, compresslevel=9]) -> string\n\
1987\n\
1988Compress data in one shot. If you want to compress data sequentially,\n\
1989use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1990given, must be a number between 1 and 9.\n\
1991");
1992
1993static PyObject *
1994bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1995{
1996 int compresslevel=9;
1997 char *data;
1998 int datasize;
1999 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002000 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002001 bz_stream _bzs;
2002 bz_stream *bzs = &_bzs;
2003 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002004 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002005
2006 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2007 kwlist, &data, &datasize,
2008 &compresslevel))
2009 return NULL;
2010
2011 if (compresslevel < 1 || compresslevel > 9) {
2012 PyErr_SetString(PyExc_ValueError,
2013 "compresslevel must be between 1 and 9");
2014 return NULL;
2015 }
2016
2017 /* Conforming to bz2 manual, this is large enough to fit compressed
2018 * data in one shot. We will check it later anyway. */
2019 bufsize = datasize + (datasize/100+1) + 600;
2020
2021 ret = PyString_FromStringAndSize(NULL, bufsize);
2022 if (!ret)
2023 return NULL;
2024
2025 memset(bzs, 0, sizeof(bz_stream));
2026
2027 bzs->next_in = data;
2028 bzs->avail_in = datasize;
2029 bzs->next_out = BUF(ret);
2030 bzs->avail_out = bufsize;
2031
2032 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2033 if (bzerror != BZ_OK) {
2034 Util_CatchBZ2Error(bzerror);
2035 Py_DECREF(ret);
2036 return NULL;
2037 }
Tim Peterse3228092002-11-09 04:21:44 +00002038
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002039 for (;;) {
2040 Py_BEGIN_ALLOW_THREADS
2041 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2042 Py_END_ALLOW_THREADS
2043 if (bzerror == BZ_STREAM_END) {
2044 break;
2045 } else if (bzerror != BZ_FINISH_OK) {
2046 BZ2_bzCompressEnd(bzs);
2047 Util_CatchBZ2Error(bzerror);
2048 Py_DECREF(ret);
2049 return NULL;
2050 }
2051 if (bzs->avail_out == 0) {
2052 bufsize = Util_NewBufferSize(bufsize);
2053 if (_PyString_Resize(&ret, bufsize) < 0) {
2054 BZ2_bzCompressEnd(bzs);
2055 Py_DECREF(ret);
2056 return NULL;
2057 }
2058 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2059 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2060 }
2061 }
2062
2063 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002064 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065 BZ2_bzCompressEnd(bzs);
2066
2067 return ret;
2068}
2069
2070PyDoc_STRVAR(bz2_decompress__doc__,
2071"decompress(data) -> decompressed data\n\
2072\n\
2073Decompress data in one shot. If you want to decompress data sequentially,\n\
2074use an instance of BZ2Decompressor instead.\n\
2075");
2076
2077static PyObject *
2078bz2_decompress(PyObject *self, PyObject *args)
2079{
2080 char *data;
2081 int datasize;
2082 int bufsize = SMALLCHUNK;
2083 PyObject *ret;
2084 bz_stream _bzs;
2085 bz_stream *bzs = &_bzs;
2086 int bzerror;
2087
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002088 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002089 return NULL;
2090
2091 if (datasize == 0)
2092 return PyString_FromString("");
2093
2094 ret = PyString_FromStringAndSize(NULL, bufsize);
2095 if (!ret)
2096 return NULL;
2097
2098 memset(bzs, 0, sizeof(bz_stream));
2099
2100 bzs->next_in = data;
2101 bzs->avail_in = datasize;
2102 bzs->next_out = BUF(ret);
2103 bzs->avail_out = bufsize;
2104
2105 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2106 if (bzerror != BZ_OK) {
2107 Util_CatchBZ2Error(bzerror);
2108 Py_DECREF(ret);
2109 return NULL;
2110 }
Tim Peterse3228092002-11-09 04:21:44 +00002111
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002112 for (;;) {
2113 Py_BEGIN_ALLOW_THREADS
2114 bzerror = BZ2_bzDecompress(bzs);
2115 Py_END_ALLOW_THREADS
2116 if (bzerror == BZ_STREAM_END) {
2117 break;
2118 } else if (bzerror != BZ_OK) {
2119 BZ2_bzDecompressEnd(bzs);
2120 Util_CatchBZ2Error(bzerror);
2121 Py_DECREF(ret);
2122 return NULL;
2123 }
2124 if (bzs->avail_out == 0) {
2125 bufsize = Util_NewBufferSize(bufsize);
2126 if (_PyString_Resize(&ret, bufsize) < 0) {
2127 BZ2_bzDecompressEnd(bzs);
2128 Py_DECREF(ret);
2129 return NULL;
2130 }
2131 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2132 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2133 } else if (bzs->avail_in == 0) {
2134 BZ2_bzDecompressEnd(bzs);
2135 PyErr_SetString(PyExc_ValueError,
2136 "couldn't find end of stream");
2137 Py_DECREF(ret);
2138 return NULL;
2139 }
2140 }
2141
2142 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002143 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002144 BZ2_bzDecompressEnd(bzs);
2145
2146 return ret;
2147}
2148
2149static PyMethodDef bz2_methods[] = {
2150 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2151 bz2_compress__doc__},
2152 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2153 bz2_decompress__doc__},
2154 {NULL, NULL} /* sentinel */
2155};
2156
2157/* ===================================================================== */
2158/* Initialization function. */
2159
2160PyDoc_STRVAR(bz2__doc__,
2161"The python bz2 module provides a comprehensive interface for\n\
2162the bz2 compression library. It implements a complete file\n\
2163interface, one shot (de)compression functions, and types for\n\
2164sequential (de)compression.\n\
2165");
2166
Neal Norwitz21d896c2003-07-01 20:15:21 +00002167PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002168initbz2(void)
2169{
2170 PyObject *m;
2171
2172 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002173 BZ2Comp_Type.ob_type = &PyType_Type;
2174 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002175
2176 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002177 if (m == NULL)
2178 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002179
2180 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2181
2182 Py_INCREF(&BZ2File_Type);
2183 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2184
2185 Py_INCREF(&BZ2Comp_Type);
2186 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2187
2188 Py_INCREF(&BZ2Decomp_Type);
2189 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2190}