blob: 4547815c5cff81d5c06c9a38604241aba31bead4 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32;
58#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
113 long pos;
114 long size;
115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
246
247 buf = BUF(v);
248 end = buf + total_v_size;
249
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (univ_newline) {
253 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 if (bzerror != BZ_OK || buf == end)
257 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000261 /* Seeing a \n here with
262 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263 * saw a \r before.
264 */
265 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000266 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
272 }
273 }
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
281 }
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000285 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
301 }
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
315 }
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
326}
327
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328/* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330size_t
331Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333{
334 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 int newlinetypes, skipnextlf;
336
337 assert(buf != NULL);
338 assert(stream != NULL);
339
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
342
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
345
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
348 */
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
353
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
364 }
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
370 }
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
375 */
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
382 }
383 }
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
389 }
390 }
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
394}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395
396/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000398Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
403 }
404}
405
406/* This is a hacked version of Python's fileobject.c:readahead(). */
407static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409{
410 int chunksize;
411 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412
413 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000414 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000415 return 0;
416 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 int len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000979 PyObject *offobj;
980 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 char small_buffer[SMALLCHUNK];
982 char *buffer = small_buffer;
983 size_t buffersize = SMALLCHUNK;
984 int bytesread = 0;
985 int readsize;
986 int chunksize;
987 int bzerror;
988 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000989 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000990
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000991 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
992 return NULL;
993#if !defined(HAVE_LARGEFILE_SUPPORT)
994 offset = PyInt_AsLong(offobj);
995#else
996 offset = PyLong_Check(offobj) ?
997 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
998#endif
999 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 return NULL;
1001
1002 ACQUIRE_LOCK(self);
1003 Util_DropReadAhead(self);
1004 switch (self->mode) {
1005 case MODE_READ:
1006 case MODE_READ_EOF:
1007 break;
Tim Peterse3228092002-11-09 04:21:44 +00001008
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001009 case MODE_CLOSED:
1010 PyErr_SetString(PyExc_ValueError,
1011 "I/O operation on closed file");
1012 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001013
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001014 default:
1015 PyErr_SetString(PyExc_IOError,
1016 "seek works only while reading");
1017 goto cleanup;;
1018 }
1019
1020 if (offset < 0) {
1021 if (where == 1) {
1022 offset = self->pos + offset;
1023 rewind = 1;
1024 } else if (where == 2) {
1025 if (self->size == -1) {
1026 assert(self->mode != MODE_READ_EOF);
1027 for (;;) {
1028 Py_BEGIN_ALLOW_THREADS
1029 chunksize = Util_UnivNewlineRead(
1030 &bzerror, self->fp,
1031 buffer, buffersize,
1032 self);
1033 self->pos += chunksize;
1034 Py_END_ALLOW_THREADS
1035
1036 bytesread += chunksize;
1037 if (bzerror == BZ_STREAM_END) {
1038 break;
1039 } else if (bzerror != BZ_OK) {
1040 Util_CatchBZ2Error(bzerror);
1041 goto cleanup;
1042 }
1043 }
1044 self->mode = MODE_READ_EOF;
1045 self->size = self->pos;
1046 bytesread = 0;
1047 }
1048 offset = self->size + offset;
1049 if (offset >= self->pos)
1050 offset -= self->pos;
1051 else
1052 rewind = 1;
1053 }
1054 if (offset < 0)
1055 offset = 0;
1056 } else if (where == 0) {
1057 if (offset >= self->pos)
1058 offset -= self->pos;
1059 else
1060 rewind = 1;
1061 }
1062
1063 if (rewind) {
1064 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065 if (bzerror != BZ_OK) {
1066 Util_CatchBZ2Error(bzerror);
1067 goto cleanup;
1068 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001069 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 if (!ret)
1071 goto cleanup;
1072 Py_DECREF(ret);
1073 ret = NULL;
1074 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001075 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076 0, 0, NULL, 0);
1077 if (bzerror != BZ_OK) {
1078 Util_CatchBZ2Error(bzerror);
1079 goto cleanup;
1080 }
1081 self->mode = MODE_READ;
1082 } else if (self->mode == MODE_READ_EOF) {
1083 goto exit;
1084 }
1085
1086 if (offset == 0)
1087 goto exit;
1088
1089 /* Before getting here, offset must be set to the number of bytes
1090 * to walk forward. */
1091 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001092 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001093 readsize = buffersize;
1094 else
1095 readsize = offset-bytesread;
1096 Py_BEGIN_ALLOW_THREADS
1097 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1098 buffer, readsize, self);
1099 self->pos += chunksize;
1100 Py_END_ALLOW_THREADS
1101 bytesread += chunksize;
1102 if (bzerror == BZ_STREAM_END) {
1103 self->size = self->pos;
1104 self->mode = MODE_READ_EOF;
1105 break;
1106 } else if (bzerror != BZ_OK) {
1107 Util_CatchBZ2Error(bzerror);
1108 goto cleanup;
1109 }
1110 if (bytesread == offset)
1111 break;
1112 }
1113
1114exit:
1115 Py_INCREF(Py_None);
1116 ret = Py_None;
1117
1118cleanup:
1119 RELEASE_LOCK(self);
1120 return ret;
1121}
1122
1123PyDoc_STRVAR(BZ2File_tell__doc__,
1124"tell() -> int\n\
1125\n\
1126Return the current file position, an integer (may be a long integer).\n\
1127");
1128
1129static PyObject *
1130BZ2File_tell(BZ2FileObject *self, PyObject *args)
1131{
1132 PyObject *ret = NULL;
1133
1134 if (self->mode == MODE_CLOSED) {
1135 PyErr_SetString(PyExc_ValueError,
1136 "I/O operation on closed file");
1137 goto cleanup;
1138 }
1139
1140 ret = PyInt_FromLong(self->pos);
1141
1142cleanup:
1143 return ret;
1144}
1145
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001146PyDoc_STRVAR(BZ2File_close__doc__,
1147"close() -> None or (perhaps) an integer\n\
1148\n\
1149Close the file. Sets data attribute .closed to true. A closed file\n\
1150cannot be used for further I/O operations. close() may be called more\n\
1151than once without error.\n\
1152");
1153
1154static PyObject *
1155BZ2File_close(BZ2FileObject *self)
1156{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001157 PyObject *ret = NULL;
1158 int bzerror = BZ_OK;
1159
1160 ACQUIRE_LOCK(self);
1161 switch (self->mode) {
1162 case MODE_READ:
1163 case MODE_READ_EOF:
1164 BZ2_bzReadClose(&bzerror, self->fp);
1165 break;
1166 case MODE_WRITE:
1167 BZ2_bzWriteClose(&bzerror, self->fp,
1168 0, NULL, NULL);
1169 break;
1170 }
1171 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001172 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001173 if (bzerror != BZ_OK) {
1174 Util_CatchBZ2Error(bzerror);
1175 Py_XDECREF(ret);
1176 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001177 }
1178
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001179 RELEASE_LOCK(self);
1180 return ret;
1181}
1182
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001183static PyObject *BZ2File_getiter(BZ2FileObject *self);
1184
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185static PyMethodDef BZ2File_methods[] = {
1186 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1187 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1188 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001189 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001190 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1191 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1192 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1193 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1195 {NULL, NULL} /* sentinel */
1196};
1197
1198
1199/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001200/* Getters and setters of BZ2File. */
1201
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001202/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1203static PyObject *
1204BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1205{
1206 switch (self->f_newlinetypes) {
1207 case NEWLINE_UNKNOWN:
1208 Py_INCREF(Py_None);
1209 return Py_None;
1210 case NEWLINE_CR:
1211 return PyString_FromString("\r");
1212 case NEWLINE_LF:
1213 return PyString_FromString("\n");
1214 case NEWLINE_CR|NEWLINE_LF:
1215 return Py_BuildValue("(ss)", "\r", "\n");
1216 case NEWLINE_CRLF:
1217 return PyString_FromString("\r\n");
1218 case NEWLINE_CR|NEWLINE_CRLF:
1219 return Py_BuildValue("(ss)", "\r", "\r\n");
1220 case NEWLINE_LF|NEWLINE_CRLF:
1221 return Py_BuildValue("(ss)", "\n", "\r\n");
1222 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1223 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1224 default:
1225 PyErr_Format(PyExc_SystemError,
1226 "Unknown newlines value 0x%x\n",
1227 self->f_newlinetypes);
1228 return NULL;
1229 }
1230}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001231
1232static PyObject *
1233BZ2File_get_closed(BZ2FileObject *self, void *closure)
1234{
1235 return PyInt_FromLong(self->mode == MODE_CLOSED);
1236}
1237
1238static PyObject *
1239BZ2File_get_mode(BZ2FileObject *self, void *closure)
1240{
1241 return PyObject_GetAttrString(self->file, "mode");
1242}
1243
1244static PyObject *
1245BZ2File_get_name(BZ2FileObject *self, void *closure)
1246{
1247 return PyObject_GetAttrString(self->file, "name");
1248}
1249
1250static PyGetSetDef BZ2File_getset[] = {
1251 {"closed", (getter)BZ2File_get_closed, NULL,
1252 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001253 {"newlines", (getter)BZ2File_get_newlines, NULL,
1254 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001255 {"mode", (getter)BZ2File_get_mode, NULL,
1256 "file mode ('r', 'w', or 'U')"},
1257 {"name", (getter)BZ2File_get_name, NULL,
1258 "file name"},
1259 {NULL} /* Sentinel */
1260};
1261
1262
1263/* ===================================================================== */
1264/* Members of BZ2File_Type. */
1265
1266#undef OFF
1267#define OFF(x) offsetof(BZ2FileObject, x)
1268
1269static PyMemberDef BZ2File_members[] = {
1270 {"softspace", T_INT, OFF(f_softspace), 0,
1271 "flag indicating that a space needs to be printed; used by print"},
1272 {NULL} /* Sentinel */
1273};
1274
1275/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276/* Slot definitions for BZ2File_Type. */
1277
1278static int
1279BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1280{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001281 static char *kwlist[] = {"filename", "mode", "buffering",
1282 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001283 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001284 char *mode = "r";
1285 int buffering = -1;
1286 int compresslevel = 9;
1287 int bzerror;
1288 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001289
1290 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001291
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001292 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1293 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001294 &compresslevel))
1295 return -1;
1296
1297 if (compresslevel < 1 || compresslevel > 9) {
1298 PyErr_SetString(PyExc_ValueError,
1299 "compresslevel must be between 1 and 9");
1300 return -1;
1301 }
1302
1303 for (;;) {
1304 int error = 0;
1305 switch (*mode) {
1306 case 'r':
1307 case 'w':
1308 if (mode_char)
1309 error = 1;
1310 mode_char = *mode;
1311 break;
1312
1313 case 'b':
1314 break;
1315
1316 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001317 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001318 break;
1319
1320 default:
1321 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001322 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001323 }
1324 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001325 PyErr_Format(PyExc_ValueError,
1326 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001327 return -1;
1328 }
1329 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001330 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001331 break;
1332 }
1333
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001334 if (mode_char == 0) {
1335 mode_char = 'r';
1336 }
1337
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001338 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001339
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001340 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1341 name, mode, buffering);
1342 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001343 return -1;
1344
1345 /* From now on, we have stuff to dealloc, so jump to error label
1346 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348#ifdef WITH_THREAD
1349 self->lock = PyThread_allocate_lock();
1350 if (!self->lock)
1351 goto error;
1352#endif
1353
1354 if (mode_char == 'r')
1355 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001356 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001357 0, 0, NULL, 0);
1358 else
1359 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001360 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001361 compresslevel, 0, 0);
1362
1363 if (bzerror != BZ_OK) {
1364 Util_CatchBZ2Error(bzerror);
1365 goto error;
1366 }
1367
1368 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1369
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001370 return 0;
1371
1372error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001373 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001374#ifdef WITH_THREAD
1375 if (self->lock)
1376 PyThread_free_lock(self->lock);
1377#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001378 return -1;
1379}
1380
1381static void
1382BZ2File_dealloc(BZ2FileObject *self)
1383{
1384 int bzerror;
1385#ifdef WITH_THREAD
1386 if (self->lock)
1387 PyThread_free_lock(self->lock);
1388#endif
1389 switch (self->mode) {
1390 case MODE_READ:
1391 case MODE_READ_EOF:
1392 BZ2_bzReadClose(&bzerror, self->fp);
1393 break;
1394 case MODE_WRITE:
1395 BZ2_bzWriteClose(&bzerror, self->fp,
1396 0, NULL, NULL);
1397 break;
1398 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001399 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001400 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001401 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402}
1403
1404/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1405static PyObject *
1406BZ2File_getiter(BZ2FileObject *self)
1407{
1408 if (self->mode == MODE_CLOSED) {
1409 PyErr_SetString(PyExc_ValueError,
1410 "I/O operation on closed file");
1411 return NULL;
1412 }
1413 Py_INCREF((PyObject*)self);
1414 return (PyObject *)self;
1415}
1416
1417/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1418#define READAHEAD_BUFSIZE 8192
1419static PyObject *
1420BZ2File_iternext(BZ2FileObject *self)
1421{
1422 PyStringObject* ret;
1423 ACQUIRE_LOCK(self);
1424 if (self->mode == MODE_CLOSED) {
1425 PyErr_SetString(PyExc_ValueError,
1426 "I/O operation on closed file");
1427 return NULL;
1428 }
1429 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1430 RELEASE_LOCK(self);
1431 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1432 Py_XDECREF(ret);
1433 return NULL;
1434 }
1435 return (PyObject *)ret;
1436}
1437
1438/* ===================================================================== */
1439/* BZ2File_Type definition. */
1440
1441PyDoc_VAR(BZ2File__doc__) =
1442PyDoc_STR(
1443"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1444\n\
1445Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1446writing. When opened for writing, the file will be created if it doesn't\n\
1447exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1448unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1449is given, must be a number between 1 and 9.\n\
1450")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001451PyDoc_STR(
1452"\n\
1453Add a 'U' to mode to open the file for input with universal newline\n\
1454support. Any line ending in the input file will be seen as a '\\n' in\n\
1455Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1456for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1457'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1458newlines are available only when reading.\n\
1459")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001460;
1461
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001462static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001463 PyObject_HEAD_INIT(NULL)
1464 0, /*ob_size*/
1465 "bz2.BZ2File", /*tp_name*/
1466 sizeof(BZ2FileObject), /*tp_basicsize*/
1467 0, /*tp_itemsize*/
1468 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1469 0, /*tp_print*/
1470 0, /*tp_getattr*/
1471 0, /*tp_setattr*/
1472 0, /*tp_compare*/
1473 0, /*tp_repr*/
1474 0, /*tp_as_number*/
1475 0, /*tp_as_sequence*/
1476 0, /*tp_as_mapping*/
1477 0, /*tp_hash*/
1478 0, /*tp_call*/
1479 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001480 PyObject_GenericGetAttr,/*tp_getattro*/
1481 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001482 0, /*tp_as_buffer*/
1483 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1484 BZ2File__doc__, /*tp_doc*/
1485 0, /*tp_traverse*/
1486 0, /*tp_clear*/
1487 0, /*tp_richcompare*/
1488 0, /*tp_weaklistoffset*/
1489 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1490 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1491 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001492 BZ2File_members, /*tp_members*/
1493 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001494 0, /*tp_base*/
1495 0, /*tp_dict*/
1496 0, /*tp_descr_get*/
1497 0, /*tp_descr_set*/
1498 0, /*tp_dictoffset*/
1499 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001500 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001501 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001502 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001503 0, /*tp_is_gc*/
1504};
1505
1506
1507/* ===================================================================== */
1508/* Methods of BZ2Comp. */
1509
1510PyDoc_STRVAR(BZ2Comp_compress__doc__,
1511"compress(data) -> string\n\
1512\n\
1513Provide more data to the compressor object. It will return chunks of\n\
1514compressed data whenever possible. When you've finished providing data\n\
1515to compress, call the flush() method to finish the compression process,\n\
1516and return what is left in the internal buffers.\n\
1517");
1518
1519static PyObject *
1520BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1521{
1522 char *data;
1523 int datasize;
1524 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001525 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001526 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001527 bz_stream *bzs = &self->bzs;
1528 int bzerror;
1529
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001530 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001531 return NULL;
1532
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001533 if (datasize == 0)
1534 return PyString_FromString("");
1535
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001536 ACQUIRE_LOCK(self);
1537 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001538 PyErr_SetString(PyExc_ValueError,
1539 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001540 goto error;
1541 }
1542
1543 ret = PyString_FromStringAndSize(NULL, bufsize);
1544 if (!ret)
1545 goto error;
1546
1547 bzs->next_in = data;
1548 bzs->avail_in = datasize;
1549 bzs->next_out = BUF(ret);
1550 bzs->avail_out = bufsize;
1551
1552 totalout = BZS_TOTAL_OUT(bzs);
1553
1554 for (;;) {
1555 Py_BEGIN_ALLOW_THREADS
1556 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1557 Py_END_ALLOW_THREADS
1558 if (bzerror != BZ_RUN_OK) {
1559 Util_CatchBZ2Error(bzerror);
1560 goto error;
1561 }
1562 if (bzs->avail_out == 0) {
1563 bufsize = Util_NewBufferSize(bufsize);
1564 if (_PyString_Resize(&ret, bufsize) < 0) {
1565 BZ2_bzCompressEnd(bzs);
1566 goto error;
1567 }
1568 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1569 - totalout);
1570 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1571 } else if (bzs->avail_in == 0) {
1572 break;
1573 }
1574 }
1575
Tim Petersf29f0c62002-11-09 04:28:17 +00001576 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001577
1578 RELEASE_LOCK(self);
1579 return ret;
1580
1581error:
1582 RELEASE_LOCK(self);
1583 Py_XDECREF(ret);
1584 return NULL;
1585}
1586
1587PyDoc_STRVAR(BZ2Comp_flush__doc__,
1588"flush() -> string\n\
1589\n\
1590Finish the compression process and return what is left in internal buffers.\n\
1591You must not use the compressor object after calling this method.\n\
1592");
1593
1594static PyObject *
1595BZ2Comp_flush(BZ2CompObject *self)
1596{
1597 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001598 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001599 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001600 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001601 int bzerror;
1602
1603 ACQUIRE_LOCK(self);
1604 if (!self->running) {
1605 PyErr_SetString(PyExc_ValueError, "object was already "
1606 "flushed");
1607 goto error;
1608 }
1609 self->running = 0;
1610
1611 ret = PyString_FromStringAndSize(NULL, bufsize);
1612 if (!ret)
1613 goto error;
1614
1615 bzs->next_out = BUF(ret);
1616 bzs->avail_out = bufsize;
1617
1618 totalout = BZS_TOTAL_OUT(bzs);
1619
1620 for (;;) {
1621 Py_BEGIN_ALLOW_THREADS
1622 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1623 Py_END_ALLOW_THREADS
1624 if (bzerror == BZ_STREAM_END) {
1625 break;
1626 } else if (bzerror != BZ_FINISH_OK) {
1627 Util_CatchBZ2Error(bzerror);
1628 goto error;
1629 }
1630 if (bzs->avail_out == 0) {
1631 bufsize = Util_NewBufferSize(bufsize);
1632 if (_PyString_Resize(&ret, bufsize) < 0)
1633 goto error;
1634 bzs->next_out = BUF(ret);
1635 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1636 - totalout);
1637 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1638 }
1639 }
1640
1641 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001642 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001643
1644 RELEASE_LOCK(self);
1645 return ret;
1646
1647error:
1648 RELEASE_LOCK(self);
1649 Py_XDECREF(ret);
1650 return NULL;
1651}
1652
1653static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001654 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1655 BZ2Comp_compress__doc__},
1656 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1657 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001658 {NULL, NULL} /* sentinel */
1659};
1660
1661
1662/* ===================================================================== */
1663/* Slot definitions for BZ2Comp_Type. */
1664
1665static int
1666BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1667{
1668 int compresslevel = 9;
1669 int bzerror;
1670 static char *kwlist[] = {"compresslevel", 0};
1671
1672 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1673 kwlist, &compresslevel))
1674 return -1;
1675
1676 if (compresslevel < 1 || compresslevel > 9) {
1677 PyErr_SetString(PyExc_ValueError,
1678 "compresslevel must be between 1 and 9");
1679 goto error;
1680 }
1681
1682#ifdef WITH_THREAD
1683 self->lock = PyThread_allocate_lock();
1684 if (!self->lock)
1685 goto error;
1686#endif
1687
1688 memset(&self->bzs, 0, sizeof(bz_stream));
1689 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1690 if (bzerror != BZ_OK) {
1691 Util_CatchBZ2Error(bzerror);
1692 goto error;
1693 }
1694
1695 self->running = 1;
1696
1697 return 0;
1698error:
1699#ifdef WITH_THREAD
1700 if (self->lock)
1701 PyThread_free_lock(self->lock);
1702#endif
1703 return -1;
1704}
1705
1706static void
1707BZ2Comp_dealloc(BZ2CompObject *self)
1708{
1709#ifdef WITH_THREAD
1710 if (self->lock)
1711 PyThread_free_lock(self->lock);
1712#endif
1713 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001714 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001715}
1716
1717
1718/* ===================================================================== */
1719/* BZ2Comp_Type definition. */
1720
1721PyDoc_STRVAR(BZ2Comp__doc__,
1722"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1723\n\
1724Create a new compressor object. This object may be used to compress\n\
1725data sequentially. If you want to compress data in one shot, use the\n\
1726compress() function instead. The compresslevel parameter, if given,\n\
1727must be a number between 1 and 9.\n\
1728");
1729
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001730static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001731 PyObject_HEAD_INIT(NULL)
1732 0, /*ob_size*/
1733 "bz2.BZ2Compressor", /*tp_name*/
1734 sizeof(BZ2CompObject), /*tp_basicsize*/
1735 0, /*tp_itemsize*/
1736 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1737 0, /*tp_print*/
1738 0, /*tp_getattr*/
1739 0, /*tp_setattr*/
1740 0, /*tp_compare*/
1741 0, /*tp_repr*/
1742 0, /*tp_as_number*/
1743 0, /*tp_as_sequence*/
1744 0, /*tp_as_mapping*/
1745 0, /*tp_hash*/
1746 0, /*tp_call*/
1747 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001748 PyObject_GenericGetAttr,/*tp_getattro*/
1749 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001750 0, /*tp_as_buffer*/
1751 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1752 BZ2Comp__doc__, /*tp_doc*/
1753 0, /*tp_traverse*/
1754 0, /*tp_clear*/
1755 0, /*tp_richcompare*/
1756 0, /*tp_weaklistoffset*/
1757 0, /*tp_iter*/
1758 0, /*tp_iternext*/
1759 BZ2Comp_methods, /*tp_methods*/
1760 0, /*tp_members*/
1761 0, /*tp_getset*/
1762 0, /*tp_base*/
1763 0, /*tp_dict*/
1764 0, /*tp_descr_get*/
1765 0, /*tp_descr_set*/
1766 0, /*tp_dictoffset*/
1767 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001768 PyType_GenericAlloc, /*tp_alloc*/
1769 PyType_GenericNew, /*tp_new*/
1770 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001771 0, /*tp_is_gc*/
1772};
1773
1774
1775/* ===================================================================== */
1776/* Members of BZ2Decomp. */
1777
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001778#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001779#define OFF(x) offsetof(BZ2DecompObject, x)
1780
1781static PyMemberDef BZ2Decomp_members[] = {
1782 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1783 {NULL} /* Sentinel */
1784};
1785
1786
1787/* ===================================================================== */
1788/* Methods of BZ2Decomp. */
1789
1790PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1791"decompress(data) -> string\n\
1792\n\
1793Provide more data to the decompressor object. It will return chunks\n\
1794of decompressed data whenever possible. If you try to decompress data\n\
1795after the end of stream is found, EOFError will be raised. If any data\n\
1796was found after the end of stream, it'll be ignored and saved in\n\
1797unused_data attribute.\n\
1798");
1799
1800static PyObject *
1801BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1802{
1803 char *data;
1804 int datasize;
1805 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001806 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001807 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001808 bz_stream *bzs = &self->bzs;
1809 int bzerror;
1810
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001811 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001812 return NULL;
1813
1814 ACQUIRE_LOCK(self);
1815 if (!self->running) {
1816 PyErr_SetString(PyExc_EOFError, "end of stream was "
1817 "already found");
1818 goto error;
1819 }
1820
1821 ret = PyString_FromStringAndSize(NULL, bufsize);
1822 if (!ret)
1823 goto error;
1824
1825 bzs->next_in = data;
1826 bzs->avail_in = datasize;
1827 bzs->next_out = BUF(ret);
1828 bzs->avail_out = bufsize;
1829
1830 totalout = BZS_TOTAL_OUT(bzs);
1831
1832 for (;;) {
1833 Py_BEGIN_ALLOW_THREADS
1834 bzerror = BZ2_bzDecompress(bzs);
1835 Py_END_ALLOW_THREADS
1836 if (bzerror == BZ_STREAM_END) {
1837 if (bzs->avail_in != 0) {
1838 Py_DECREF(self->unused_data);
1839 self->unused_data =
1840 PyString_FromStringAndSize(bzs->next_in,
1841 bzs->avail_in);
1842 }
1843 self->running = 0;
1844 break;
1845 }
1846 if (bzerror != BZ_OK) {
1847 Util_CatchBZ2Error(bzerror);
1848 goto error;
1849 }
1850 if (bzs->avail_out == 0) {
1851 bufsize = Util_NewBufferSize(bufsize);
1852 if (_PyString_Resize(&ret, bufsize) < 0) {
1853 BZ2_bzDecompressEnd(bzs);
1854 goto error;
1855 }
1856 bzs->next_out = BUF(ret);
1857 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1858 - totalout);
1859 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1860 } else if (bzs->avail_in == 0) {
1861 break;
1862 }
1863 }
1864
1865 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001866 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001867
1868 RELEASE_LOCK(self);
1869 return ret;
1870
1871error:
1872 RELEASE_LOCK(self);
1873 Py_XDECREF(ret);
1874 return NULL;
1875}
1876
1877static PyMethodDef BZ2Decomp_methods[] = {
1878 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1879 {NULL, NULL} /* sentinel */
1880};
1881
1882
1883/* ===================================================================== */
1884/* Slot definitions for BZ2Decomp_Type. */
1885
1886static int
1887BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1888{
1889 int bzerror;
1890
1891 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1892 return -1;
1893
1894#ifdef WITH_THREAD
1895 self->lock = PyThread_allocate_lock();
1896 if (!self->lock)
1897 goto error;
1898#endif
1899
1900 self->unused_data = PyString_FromString("");
1901 if (!self->unused_data)
1902 goto error;
1903
1904 memset(&self->bzs, 0, sizeof(bz_stream));
1905 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1906 if (bzerror != BZ_OK) {
1907 Util_CatchBZ2Error(bzerror);
1908 goto error;
1909 }
1910
1911 self->running = 1;
1912
1913 return 0;
1914
1915error:
1916#ifdef WITH_THREAD
1917 if (self->lock)
1918 PyThread_free_lock(self->lock);
1919#endif
1920 Py_XDECREF(self->unused_data);
1921 return -1;
1922}
1923
1924static void
1925BZ2Decomp_dealloc(BZ2DecompObject *self)
1926{
1927#ifdef WITH_THREAD
1928 if (self->lock)
1929 PyThread_free_lock(self->lock);
1930#endif
1931 Py_XDECREF(self->unused_data);
1932 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001933 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001934}
1935
1936
1937/* ===================================================================== */
1938/* BZ2Decomp_Type definition. */
1939
1940PyDoc_STRVAR(BZ2Decomp__doc__,
1941"BZ2Decompressor() -> decompressor object\n\
1942\n\
1943Create a new decompressor object. This object may be used to decompress\n\
1944data sequentially. If you want to decompress data in one shot, use the\n\
1945decompress() function instead.\n\
1946");
1947
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001948static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949 PyObject_HEAD_INIT(NULL)
1950 0, /*ob_size*/
1951 "bz2.BZ2Decompressor", /*tp_name*/
1952 sizeof(BZ2DecompObject), /*tp_basicsize*/
1953 0, /*tp_itemsize*/
1954 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1955 0, /*tp_print*/
1956 0, /*tp_getattr*/
1957 0, /*tp_setattr*/
1958 0, /*tp_compare*/
1959 0, /*tp_repr*/
1960 0, /*tp_as_number*/
1961 0, /*tp_as_sequence*/
1962 0, /*tp_as_mapping*/
1963 0, /*tp_hash*/
1964 0, /*tp_call*/
1965 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001966 PyObject_GenericGetAttr,/*tp_getattro*/
1967 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001968 0, /*tp_as_buffer*/
1969 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1970 BZ2Decomp__doc__, /*tp_doc*/
1971 0, /*tp_traverse*/
1972 0, /*tp_clear*/
1973 0, /*tp_richcompare*/
1974 0, /*tp_weaklistoffset*/
1975 0, /*tp_iter*/
1976 0, /*tp_iternext*/
1977 BZ2Decomp_methods, /*tp_methods*/
1978 BZ2Decomp_members, /*tp_members*/
1979 0, /*tp_getset*/
1980 0, /*tp_base*/
1981 0, /*tp_dict*/
1982 0, /*tp_descr_get*/
1983 0, /*tp_descr_set*/
1984 0, /*tp_dictoffset*/
1985 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001986 PyType_GenericAlloc, /*tp_alloc*/
1987 PyType_GenericNew, /*tp_new*/
1988 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001989 0, /*tp_is_gc*/
1990};
1991
1992
1993/* ===================================================================== */
1994/* Module functions. */
1995
1996PyDoc_STRVAR(bz2_compress__doc__,
1997"compress(data [, compresslevel=9]) -> string\n\
1998\n\
1999Compress data in one shot. If you want to compress data sequentially,\n\
2000use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2001given, must be a number between 1 and 9.\n\
2002");
2003
2004static PyObject *
2005bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2006{
2007 int compresslevel=9;
2008 char *data;
2009 int datasize;
2010 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002011 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002012 bz_stream _bzs;
2013 bz_stream *bzs = &_bzs;
2014 int bzerror;
2015 static char *kwlist[] = {"data", "compresslevel", 0};
2016
2017 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2018 kwlist, &data, &datasize,
2019 &compresslevel))
2020 return NULL;
2021
2022 if (compresslevel < 1 || compresslevel > 9) {
2023 PyErr_SetString(PyExc_ValueError,
2024 "compresslevel must be between 1 and 9");
2025 return NULL;
2026 }
2027
2028 /* Conforming to bz2 manual, this is large enough to fit compressed
2029 * data in one shot. We will check it later anyway. */
2030 bufsize = datasize + (datasize/100+1) + 600;
2031
2032 ret = PyString_FromStringAndSize(NULL, bufsize);
2033 if (!ret)
2034 return NULL;
2035
2036 memset(bzs, 0, sizeof(bz_stream));
2037
2038 bzs->next_in = data;
2039 bzs->avail_in = datasize;
2040 bzs->next_out = BUF(ret);
2041 bzs->avail_out = bufsize;
2042
2043 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2044 if (bzerror != BZ_OK) {
2045 Util_CatchBZ2Error(bzerror);
2046 Py_DECREF(ret);
2047 return NULL;
2048 }
Tim Peterse3228092002-11-09 04:21:44 +00002049
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002050 for (;;) {
2051 Py_BEGIN_ALLOW_THREADS
2052 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2053 Py_END_ALLOW_THREADS
2054 if (bzerror == BZ_STREAM_END) {
2055 break;
2056 } else if (bzerror != BZ_FINISH_OK) {
2057 BZ2_bzCompressEnd(bzs);
2058 Util_CatchBZ2Error(bzerror);
2059 Py_DECREF(ret);
2060 return NULL;
2061 }
2062 if (bzs->avail_out == 0) {
2063 bufsize = Util_NewBufferSize(bufsize);
2064 if (_PyString_Resize(&ret, bufsize) < 0) {
2065 BZ2_bzCompressEnd(bzs);
2066 Py_DECREF(ret);
2067 return NULL;
2068 }
2069 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2070 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2071 }
2072 }
2073
2074 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002075 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002076 BZ2_bzCompressEnd(bzs);
2077
2078 return ret;
2079}
2080
2081PyDoc_STRVAR(bz2_decompress__doc__,
2082"decompress(data) -> decompressed data\n\
2083\n\
2084Decompress data in one shot. If you want to decompress data sequentially,\n\
2085use an instance of BZ2Decompressor instead.\n\
2086");
2087
2088static PyObject *
2089bz2_decompress(PyObject *self, PyObject *args)
2090{
2091 char *data;
2092 int datasize;
2093 int bufsize = SMALLCHUNK;
2094 PyObject *ret;
2095 bz_stream _bzs;
2096 bz_stream *bzs = &_bzs;
2097 int bzerror;
2098
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002099 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002100 return NULL;
2101
2102 if (datasize == 0)
2103 return PyString_FromString("");
2104
2105 ret = PyString_FromStringAndSize(NULL, bufsize);
2106 if (!ret)
2107 return NULL;
2108
2109 memset(bzs, 0, sizeof(bz_stream));
2110
2111 bzs->next_in = data;
2112 bzs->avail_in = datasize;
2113 bzs->next_out = BUF(ret);
2114 bzs->avail_out = bufsize;
2115
2116 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2117 if (bzerror != BZ_OK) {
2118 Util_CatchBZ2Error(bzerror);
2119 Py_DECREF(ret);
2120 return NULL;
2121 }
Tim Peterse3228092002-11-09 04:21:44 +00002122
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002123 for (;;) {
2124 Py_BEGIN_ALLOW_THREADS
2125 bzerror = BZ2_bzDecompress(bzs);
2126 Py_END_ALLOW_THREADS
2127 if (bzerror == BZ_STREAM_END) {
2128 break;
2129 } else if (bzerror != BZ_OK) {
2130 BZ2_bzDecompressEnd(bzs);
2131 Util_CatchBZ2Error(bzerror);
2132 Py_DECREF(ret);
2133 return NULL;
2134 }
2135 if (bzs->avail_out == 0) {
2136 bufsize = Util_NewBufferSize(bufsize);
2137 if (_PyString_Resize(&ret, bufsize) < 0) {
2138 BZ2_bzDecompressEnd(bzs);
2139 Py_DECREF(ret);
2140 return NULL;
2141 }
2142 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2143 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2144 } else if (bzs->avail_in == 0) {
2145 BZ2_bzDecompressEnd(bzs);
2146 PyErr_SetString(PyExc_ValueError,
2147 "couldn't find end of stream");
2148 Py_DECREF(ret);
2149 return NULL;
2150 }
2151 }
2152
2153 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002154 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002155 BZ2_bzDecompressEnd(bzs);
2156
2157 return ret;
2158}
2159
2160static PyMethodDef bz2_methods[] = {
2161 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2162 bz2_compress__doc__},
2163 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2164 bz2_decompress__doc__},
2165 {NULL, NULL} /* sentinel */
2166};
2167
2168/* ===================================================================== */
2169/* Initialization function. */
2170
2171PyDoc_STRVAR(bz2__doc__,
2172"The python bz2 module provides a comprehensive interface for\n\
2173the bz2 compression library. It implements a complete file\n\
2174interface, one shot (de)compression functions, and types for\n\
2175sequential (de)compression.\n\
2176");
2177
Neal Norwitz21d896c2003-07-01 20:15:21 +00002178PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002179initbz2(void)
2180{
2181 PyObject *m;
2182
2183 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002184 BZ2Comp_Type.ob_type = &PyType_Type;
2185 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002186
2187 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2188
2189 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2190
2191 Py_INCREF(&BZ2File_Type);
2192 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2193
2194 Py_INCREF(&BZ2Comp_Type);
2195 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2196
2197 Py_INCREF(&BZ2Decomp_Type);
2198 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2199}