blob: 1fb6665085bb531520f41aee6b07392f99b824d6 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000034
35#ifdef BZ_CONFIG_ERROR
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#if SIZEOF_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#elif SIZEOF_LONG_LONG >= 8
41#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000042 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000043#else
44#define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46#endif
47
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000048#else /* ! BZ_CONFIG_ERROR */
49
50#define BZ2_bzRead bzRead
51#define BZ2_bzReadOpen bzReadOpen
52#define BZ2_bzReadClose bzReadClose
53#define BZ2_bzWrite bzWrite
54#define BZ2_bzWriteOpen bzWriteOpen
55#define BZ2_bzWriteClose bzWriteClose
56#define BZ2_bzCompress bzCompress
57#define BZ2_bzCompressInit bzCompressInit
58#define BZ2_bzCompressEnd bzCompressEnd
59#define BZ2_bzDecompress bzDecompress
60#define BZ2_bzDecompressInit bzDecompressInit
61#define BZ2_bzDecompressEnd bzDecompressEnd
62
63#define BZS_TOTAL_OUT(bzs) bzs->total_out
64
65#endif /* ! BZ_CONFIG_ERROR */
66
67
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000068#ifdef WITH_THREAD
69#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
76#ifdef WITH_UNIVERSAL_NEWLINES
77/* Bits in f_newlinetypes */
78#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
79#define NEWLINE_CR 1 /* \r newline seen */
80#define NEWLINE_LF 2 /* \n newline seen */
81#define NEWLINE_CRLF 4 /* \r\n newline seen */
82#endif
83
84/* ===================================================================== */
85/* Structure definitions. */
86
87typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000088 PyObject_HEAD
89 PyObject *file;
90
91 char* f_buf; /* Allocated readahead buffer */
92 char* f_bufend; /* Points after last occupied position */
93 char* f_bufptr; /* Current buffer position */
94
95 int f_softspace; /* Flag used by 'print' command */
96
97#ifdef WITH_UNIVERSAL_NEWLINES
98 int f_univ_newline; /* Handle any newline convention */
99 int f_newlinetypes; /* Types of newlines seen */
100 int f_skipnextlf; /* Skip next \n */
101#endif
102
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000103 BZFILE *fp;
104 int mode;
105 long pos;
106 long size;
107#ifdef WITH_THREAD
108 PyThread_type_lock lock;
109#endif
110} BZ2FileObject;
111
112typedef struct {
113 PyObject_HEAD
114 bz_stream bzs;
115 int running;
116#ifdef WITH_THREAD
117 PyThread_type_lock lock;
118#endif
119} BZ2CompObject;
120
121typedef struct {
122 PyObject_HEAD
123 bz_stream bzs;
124 int running;
125 PyObject *unused_data;
126#ifdef WITH_THREAD
127 PyThread_type_lock lock;
128#endif
129} BZ2DecompObject;
130
131/* ===================================================================== */
132/* Utility functions. */
133
134static int
135Util_CatchBZ2Error(int bzerror)
136{
137 int ret = 0;
138 switch(bzerror) {
139 case BZ_OK:
140 case BZ_STREAM_END:
141 break;
142
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000143#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000144 case BZ_CONFIG_ERROR:
145 PyErr_SetString(PyExc_SystemError,
146 "the bz2 library was not compiled "
147 "correctly");
148 ret = 1;
149 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000150#endif
Tim Peterse3228092002-11-09 04:21:44 +0000151
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_PARAM_ERROR:
153 PyErr_SetString(PyExc_ValueError,
154 "the bz2 library has received wrong "
155 "parameters");
156 ret = 1;
157 break;
Tim Peterse3228092002-11-09 04:21:44 +0000158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000159 case BZ_MEM_ERROR:
160 PyErr_NoMemory();
161 ret = 1;
162 break;
163
164 case BZ_DATA_ERROR:
165 case BZ_DATA_ERROR_MAGIC:
166 PyErr_SetString(PyExc_IOError, "invalid data stream");
167 ret = 1;
168 break;
169
170 case BZ_IO_ERROR:
171 PyErr_SetString(PyExc_IOError, "unknown IO error");
172 ret = 1;
173 break;
174
175 case BZ_UNEXPECTED_EOF:
176 PyErr_SetString(PyExc_EOFError,
177 "compressed file ended before the "
178 "logical end-of-stream was detected");
179 ret = 1;
180 break;
181
182 case BZ_SEQUENCE_ERROR:
183 PyErr_SetString(PyExc_RuntimeError,
184 "wrong sequence of bz2 library "
185 "commands used");
186 ret = 1;
187 break;
188 }
189 return ret;
190}
191
192#if BUFSIZ < 8192
193#define SMALLCHUNK 8192
194#else
195#define SMALLCHUNK BUFSIZ
196#endif
197
198#if SIZEOF_INT < 4
199#define BIGCHUNK (512 * 32)
200#else
201#define BIGCHUNK (512 * 1024)
202#endif
203
204/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
205static size_t
206Util_NewBufferSize(size_t currentsize)
207{
208 if (currentsize > SMALLCHUNK) {
209 /* Keep doubling until we reach BIGCHUNK;
210 then keep adding BIGCHUNK. */
211 if (currentsize <= BIGCHUNK)
212 return currentsize + currentsize;
213 else
214 return currentsize + BIGCHUNK;
215 }
216 return currentsize + SMALLCHUNK;
217}
218
219/* This is a hacked version of Python's fileobject.c:get_line(). */
220static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000221Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000222{
223 char c;
224 char *buf, *end;
225 size_t total_v_size; /* total # of slots in buffer */
226 size_t used_v_size; /* # used slots in buffer */
227 size_t increment; /* amount to increment the buffer */
228 PyObject *v;
229 int bzerror;
230#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000231 int newlinetypes = f->f_newlinetypes;
232 int skipnextlf = f->f_skipnextlf;
233 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000234#endif
235
236 total_v_size = n > 0 ? n : 100;
237 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
238 if (v == NULL)
239 return NULL;
240
241 buf = BUF(v);
242 end = buf + total_v_size;
243
244 for (;;) {
245 Py_BEGIN_ALLOW_THREADS
246#ifdef WITH_UNIVERSAL_NEWLINES
247 if (univ_newline) {
248 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000249 BZ2_bzRead(&bzerror, f->fp, &c, 1);
250 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 if (bzerror != BZ_OK || buf == end)
252 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000253 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254 skipnextlf = 0;
255 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000256 /* Seeing a \n here with
257 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000258 * saw a \r before.
259 */
260 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000261 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 &c, 1);
263 if (bzerror != BZ_OK)
264 break;
265 } else {
266 newlinetypes |= NEWLINE_CR;
267 }
268 }
269 if (c == '\r') {
270 skipnextlf = 1;
271 c = '\n';
272 } else if ( c == '\n')
273 newlinetypes |= NEWLINE_LF;
274 *buf++ = c;
275 if (c == '\n') break;
276 }
277 if (bzerror == BZ_STREAM_END && skipnextlf)
278 newlinetypes |= NEWLINE_CR;
279 } else /* If not universal newlines use the normal loop */
280#endif
281 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000282 BZ2_bzRead(&bzerror, f->fp, &c, 1);
283 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 *buf++ = c;
285 } while (bzerror == BZ_OK && c != '\n' && buf != end);
286 Py_END_ALLOW_THREADS
287#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->f_newlinetypes = newlinetypes;
289 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290#endif
291 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000292 f->size = f->pos;
293 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294 break;
295 } else if (bzerror != BZ_OK) {
296 Util_CatchBZ2Error(bzerror);
297 Py_DECREF(v);
298 return NULL;
299 }
300 if (c == '\n')
301 break;
302 /* Must be because buf == end */
303 if (n > 0)
304 break;
305 used_v_size = total_v_size;
306 increment = total_v_size >> 2; /* mild exponential growth */
307 total_v_size += increment;
308 if (total_v_size > INT_MAX) {
309 PyErr_SetString(PyExc_OverflowError,
310 "line is longer than a Python string can hold");
311 Py_DECREF(v);
312 return NULL;
313 }
314 if (_PyString_Resize(&v, total_v_size) < 0)
315 return NULL;
316 buf = BUF(v) + used_v_size;
317 end = BUF(v) + total_v_size;
318 }
319
320 used_v_size = buf - BUF(v);
321 if (used_v_size != total_v_size)
322 _PyString_Resize(&v, used_v_size);
323 return v;
324}
325
326#ifndef WITH_UNIVERSAL_NEWLINES
327#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
328#else
329/* This is a hacked version of Python's
330 * fileobject.c:Py_UniversalNewlineFread(). */
331size_t
332Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000333 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000334{
335 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000336 int newlinetypes, skipnextlf;
337
338 assert(buf != NULL);
339 assert(stream != NULL);
340
341 if (!f->f_univ_newline)
342 return BZ2_bzRead(bzerror, stream, buf, n);
343
344 newlinetypes = f->f_newlinetypes;
345 skipnextlf = f->f_skipnextlf;
346
347 /* Invariant: n is the number of bytes remaining to be filled
348 * in the buffer.
349 */
350 while (n) {
351 size_t nread;
352 int shortread;
353 char *src = dst;
354
355 nread = BZ2_bzRead(bzerror, stream, dst, n);
356 assert(nread <= n);
357 n -= nread; /* assuming 1 byte out for each in; will adjust */
358 shortread = n != 0; /* true iff EOF or error */
359 while (nread--) {
360 char c = *src++;
361 if (c == '\r') {
362 /* Save as LF and set flag to skip next LF. */
363 *dst++ = '\n';
364 skipnextlf = 1;
365 }
366 else if (skipnextlf && c == '\n') {
367 /* Skip LF, and remember we saw CR LF. */
368 skipnextlf = 0;
369 newlinetypes |= NEWLINE_CRLF;
370 ++n;
371 }
372 else {
373 /* Normal char to be stored in buffer. Also
374 * update the newlinetypes flag if either this
375 * is an LF or the previous char was a CR.
376 */
377 if (c == '\n')
378 newlinetypes |= NEWLINE_LF;
379 else if (skipnextlf)
380 newlinetypes |= NEWLINE_CR;
381 *dst++ = c;
382 skipnextlf = 0;
383 }
384 }
385 if (shortread) {
386 /* If this is EOF, update type flags. */
387 if (skipnextlf && *bzerror == BZ_STREAM_END)
388 newlinetypes |= NEWLINE_CR;
389 break;
390 }
391 }
392 f->f_newlinetypes = newlinetypes;
393 f->f_skipnextlf = skipnextlf;
394 return dst - buf;
395}
396#endif
397
398/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
399static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000400Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000401{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000402 if (f->f_buf != NULL) {
403 PyMem_Free(f->f_buf);
404 f->f_buf = NULL;
405 }
406}
407
408/* This is a hacked version of Python's fileobject.c:readahead(). */
409static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000410Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000411{
412 int chunksize;
413 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414
415 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000416 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 return 0;
418 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000420 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000421 if (f->mode == MODE_READ_EOF) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 return -1;
423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
722 continue;
723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
726 do {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
738 } while (p != NULL);
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
804 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 int len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
979 long offset;
980 char small_buffer[SMALLCHUNK];
981 char *buffer = small_buffer;
982 size_t buffersize = SMALLCHUNK;
983 int bytesread = 0;
984 int readsize;
985 int chunksize;
986 int bzerror;
987 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000988 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000989
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000990 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
991 return NULL;
992
993 ACQUIRE_LOCK(self);
994 Util_DropReadAhead(self);
995 switch (self->mode) {
996 case MODE_READ:
997 case MODE_READ_EOF:
998 break;
Tim Peterse3228092002-11-09 04:21:44 +0000999
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 case MODE_CLOSED:
1001 PyErr_SetString(PyExc_ValueError,
1002 "I/O operation on closed file");
1003 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001004
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001005 default:
1006 PyErr_SetString(PyExc_IOError,
1007 "seek works only while reading");
1008 goto cleanup;;
1009 }
1010
1011 if (offset < 0) {
1012 if (where == 1) {
1013 offset = self->pos + offset;
1014 rewind = 1;
1015 } else if (where == 2) {
1016 if (self->size == -1) {
1017 assert(self->mode != MODE_READ_EOF);
1018 for (;;) {
1019 Py_BEGIN_ALLOW_THREADS
1020 chunksize = Util_UnivNewlineRead(
1021 &bzerror, self->fp,
1022 buffer, buffersize,
1023 self);
1024 self->pos += chunksize;
1025 Py_END_ALLOW_THREADS
1026
1027 bytesread += chunksize;
1028 if (bzerror == BZ_STREAM_END) {
1029 break;
1030 } else if (bzerror != BZ_OK) {
1031 Util_CatchBZ2Error(bzerror);
1032 goto cleanup;
1033 }
1034 }
1035 self->mode = MODE_READ_EOF;
1036 self->size = self->pos;
1037 bytesread = 0;
1038 }
1039 offset = self->size + offset;
1040 if (offset >= self->pos)
1041 offset -= self->pos;
1042 else
1043 rewind = 1;
1044 }
1045 if (offset < 0)
1046 offset = 0;
1047 } else if (where == 0) {
1048 if (offset >= self->pos)
1049 offset -= self->pos;
1050 else
1051 rewind = 1;
1052 }
1053
1054 if (rewind) {
1055 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 if (bzerror != BZ_OK) {
1057 Util_CatchBZ2Error(bzerror);
1058 goto cleanup;
1059 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001060 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061 if (!ret)
1062 goto cleanup;
1063 Py_DECREF(ret);
1064 ret = NULL;
1065 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001066 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067 0, 0, NULL, 0);
1068 if (bzerror != BZ_OK) {
1069 Util_CatchBZ2Error(bzerror);
1070 goto cleanup;
1071 }
1072 self->mode = MODE_READ;
1073 } else if (self->mode == MODE_READ_EOF) {
1074 goto exit;
1075 }
1076
1077 if (offset == 0)
1078 goto exit;
1079
1080 /* Before getting here, offset must be set to the number of bytes
1081 * to walk forward. */
1082 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001083 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001084 readsize = buffersize;
1085 else
1086 readsize = offset-bytesread;
1087 Py_BEGIN_ALLOW_THREADS
1088 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1089 buffer, readsize, self);
1090 self->pos += chunksize;
1091 Py_END_ALLOW_THREADS
1092 bytesread += chunksize;
1093 if (bzerror == BZ_STREAM_END) {
1094 self->size = self->pos;
1095 self->mode = MODE_READ_EOF;
1096 break;
1097 } else if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1100 }
1101 if (bytesread == offset)
1102 break;
1103 }
1104
1105exit:
1106 Py_INCREF(Py_None);
1107 ret = Py_None;
1108
1109cleanup:
1110 RELEASE_LOCK(self);
1111 return ret;
1112}
1113
1114PyDoc_STRVAR(BZ2File_tell__doc__,
1115"tell() -> int\n\
1116\n\
1117Return the current file position, an integer (may be a long integer).\n\
1118");
1119
1120static PyObject *
1121BZ2File_tell(BZ2FileObject *self, PyObject *args)
1122{
1123 PyObject *ret = NULL;
1124
1125 if (self->mode == MODE_CLOSED) {
1126 PyErr_SetString(PyExc_ValueError,
1127 "I/O operation on closed file");
1128 goto cleanup;
1129 }
1130
1131 ret = PyInt_FromLong(self->pos);
1132
1133cleanup:
1134 return ret;
1135}
1136
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001137PyDoc_STRVAR(BZ2File_close__doc__,
1138"close() -> None or (perhaps) an integer\n\
1139\n\
1140Close the file. Sets data attribute .closed to true. A closed file\n\
1141cannot be used for further I/O operations. close() may be called more\n\
1142than once without error.\n\
1143");
1144
1145static PyObject *
1146BZ2File_close(BZ2FileObject *self)
1147{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001148 PyObject *ret = NULL;
1149 int bzerror = BZ_OK;
1150
1151 ACQUIRE_LOCK(self);
1152 switch (self->mode) {
1153 case MODE_READ:
1154 case MODE_READ_EOF:
1155 BZ2_bzReadClose(&bzerror, self->fp);
1156 break;
1157 case MODE_WRITE:
1158 BZ2_bzWriteClose(&bzerror, self->fp,
1159 0, NULL, NULL);
1160 break;
1161 }
1162 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001163 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001164 if (bzerror != BZ_OK) {
1165 Util_CatchBZ2Error(bzerror);
1166 Py_XDECREF(ret);
1167 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168 }
1169
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001170 RELEASE_LOCK(self);
1171 return ret;
1172}
1173
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001174static PyObject *BZ2File_getiter(BZ2FileObject *self);
1175
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001176static PyMethodDef BZ2File_methods[] = {
1177 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1178 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1179 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001180 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001181 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1182 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1183 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1184 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1186 {NULL, NULL} /* sentinel */
1187};
1188
1189
1190/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001191/* Getters and setters of BZ2File. */
1192
1193#ifdef WITH_UNIVERSAL_NEWLINES
1194/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1195static PyObject *
1196BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1197{
1198 switch (self->f_newlinetypes) {
1199 case NEWLINE_UNKNOWN:
1200 Py_INCREF(Py_None);
1201 return Py_None;
1202 case NEWLINE_CR:
1203 return PyString_FromString("\r");
1204 case NEWLINE_LF:
1205 return PyString_FromString("\n");
1206 case NEWLINE_CR|NEWLINE_LF:
1207 return Py_BuildValue("(ss)", "\r", "\n");
1208 case NEWLINE_CRLF:
1209 return PyString_FromString("\r\n");
1210 case NEWLINE_CR|NEWLINE_CRLF:
1211 return Py_BuildValue("(ss)", "\r", "\r\n");
1212 case NEWLINE_LF|NEWLINE_CRLF:
1213 return Py_BuildValue("(ss)", "\n", "\r\n");
1214 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1215 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1216 default:
1217 PyErr_Format(PyExc_SystemError,
1218 "Unknown newlines value 0x%x\n",
1219 self->f_newlinetypes);
1220 return NULL;
1221 }
1222}
1223#endif
1224
1225static PyObject *
1226BZ2File_get_closed(BZ2FileObject *self, void *closure)
1227{
1228 return PyInt_FromLong(self->mode == MODE_CLOSED);
1229}
1230
1231static PyObject *
1232BZ2File_get_mode(BZ2FileObject *self, void *closure)
1233{
1234 return PyObject_GetAttrString(self->file, "mode");
1235}
1236
1237static PyObject *
1238BZ2File_get_name(BZ2FileObject *self, void *closure)
1239{
1240 return PyObject_GetAttrString(self->file, "name");
1241}
1242
1243static PyGetSetDef BZ2File_getset[] = {
1244 {"closed", (getter)BZ2File_get_closed, NULL,
1245 "True if the file is closed"},
1246#ifdef WITH_UNIVERSAL_NEWLINES
1247 {"newlines", (getter)BZ2File_get_newlines, NULL,
1248 "end-of-line convention used in this file"},
1249#endif
1250 {"mode", (getter)BZ2File_get_mode, NULL,
1251 "file mode ('r', 'w', or 'U')"},
1252 {"name", (getter)BZ2File_get_name, NULL,
1253 "file name"},
1254 {NULL} /* Sentinel */
1255};
1256
1257
1258/* ===================================================================== */
1259/* Members of BZ2File_Type. */
1260
1261#undef OFF
1262#define OFF(x) offsetof(BZ2FileObject, x)
1263
1264static PyMemberDef BZ2File_members[] = {
1265 {"softspace", T_INT, OFF(f_softspace), 0,
1266 "flag indicating that a space needs to be printed; used by print"},
1267 {NULL} /* Sentinel */
1268};
1269
1270/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271/* Slot definitions for BZ2File_Type. */
1272
1273static int
1274BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1275{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276 static char *kwlist[] = {"filename", "mode", "buffering",
1277 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001278 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001279 char *mode = "r";
1280 int buffering = -1;
1281 int compresslevel = 9;
1282 int bzerror;
1283 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001284
1285 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001286
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001287 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1288 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001289 &compresslevel))
1290 return -1;
1291
1292 if (compresslevel < 1 || compresslevel > 9) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "compresslevel must be between 1 and 9");
1295 return -1;
1296 }
1297
1298 for (;;) {
1299 int error = 0;
1300 switch (*mode) {
1301 case 'r':
1302 case 'w':
1303 if (mode_char)
1304 error = 1;
1305 mode_char = *mode;
1306 break;
1307
1308 case 'b':
1309 break;
1310
1311 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001312 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001313 break;
1314
1315 default:
1316 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001317 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001318 }
1319 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001320 PyErr_Format(PyExc_ValueError,
1321 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001322 return -1;
1323 }
1324 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001325 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001326 break;
1327 }
1328
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001329 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001330
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001331 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1332 name, mode, buffering);
1333 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001334 return -1;
1335
1336 /* From now on, we have stuff to dealloc, so jump to error label
1337 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001338
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001339#ifdef WITH_THREAD
1340 self->lock = PyThread_allocate_lock();
1341 if (!self->lock)
1342 goto error;
1343#endif
1344
1345 if (mode_char == 'r')
1346 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001347 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348 0, 0, NULL, 0);
1349 else
1350 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001351 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001352 compresslevel, 0, 0);
1353
1354 if (bzerror != BZ_OK) {
1355 Util_CatchBZ2Error(bzerror);
1356 goto error;
1357 }
1358
1359 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1360
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001361 return 0;
1362
1363error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001364 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001365#ifdef WITH_THREAD
1366 if (self->lock)
1367 PyThread_free_lock(self->lock);
1368#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369 return -1;
1370}
1371
1372static void
1373BZ2File_dealloc(BZ2FileObject *self)
1374{
1375 int bzerror;
1376#ifdef WITH_THREAD
1377 if (self->lock)
1378 PyThread_free_lock(self->lock);
1379#endif
1380 switch (self->mode) {
1381 case MODE_READ:
1382 case MODE_READ_EOF:
1383 BZ2_bzReadClose(&bzerror, self->fp);
1384 break;
1385 case MODE_WRITE:
1386 BZ2_bzWriteClose(&bzerror, self->fp,
1387 0, NULL, NULL);
1388 break;
1389 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001390 Util_DropReadAhead(self);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001391 Py_DECREF(self->file);
1392 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001393}
1394
1395/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1396static PyObject *
1397BZ2File_getiter(BZ2FileObject *self)
1398{
1399 if (self->mode == MODE_CLOSED) {
1400 PyErr_SetString(PyExc_ValueError,
1401 "I/O operation on closed file");
1402 return NULL;
1403 }
1404 Py_INCREF((PyObject*)self);
1405 return (PyObject *)self;
1406}
1407
1408/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1409#define READAHEAD_BUFSIZE 8192
1410static PyObject *
1411BZ2File_iternext(BZ2FileObject *self)
1412{
1413 PyStringObject* ret;
1414 ACQUIRE_LOCK(self);
1415 if (self->mode == MODE_CLOSED) {
1416 PyErr_SetString(PyExc_ValueError,
1417 "I/O operation on closed file");
1418 return NULL;
1419 }
1420 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1421 RELEASE_LOCK(self);
1422 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1423 Py_XDECREF(ret);
1424 return NULL;
1425 }
1426 return (PyObject *)ret;
1427}
1428
1429/* ===================================================================== */
1430/* BZ2File_Type definition. */
1431
1432PyDoc_VAR(BZ2File__doc__) =
1433PyDoc_STR(
1434"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1435\n\
1436Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1437writing. When opened for writing, the file will be created if it doesn't\n\
1438exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1439unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1440is given, must be a number between 1 and 9.\n\
1441")
1442#ifdef WITH_UNIVERSAL_NEWLINES
1443PyDoc_STR(
1444"\n\
1445Add a 'U' to mode to open the file for input with universal newline\n\
1446support. Any line ending in the input file will be seen as a '\\n' in\n\
1447Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1448for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1449'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1450newlines are available only when reading.\n\
1451")
1452#endif
1453;
1454
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001455static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001456 PyObject_HEAD_INIT(NULL)
1457 0, /*ob_size*/
1458 "bz2.BZ2File", /*tp_name*/
1459 sizeof(BZ2FileObject), /*tp_basicsize*/
1460 0, /*tp_itemsize*/
1461 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1462 0, /*tp_print*/
1463 0, /*tp_getattr*/
1464 0, /*tp_setattr*/
1465 0, /*tp_compare*/
1466 0, /*tp_repr*/
1467 0, /*tp_as_number*/
1468 0, /*tp_as_sequence*/
1469 0, /*tp_as_mapping*/
1470 0, /*tp_hash*/
1471 0, /*tp_call*/
1472 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001473 PyObject_GenericGetAttr,/*tp_getattro*/
1474 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001475 0, /*tp_as_buffer*/
1476 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1477 BZ2File__doc__, /*tp_doc*/
1478 0, /*tp_traverse*/
1479 0, /*tp_clear*/
1480 0, /*tp_richcompare*/
1481 0, /*tp_weaklistoffset*/
1482 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1483 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1484 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001485 BZ2File_members, /*tp_members*/
1486 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001487 0, /*tp_base*/
1488 0, /*tp_dict*/
1489 0, /*tp_descr_get*/
1490 0, /*tp_descr_set*/
1491 0, /*tp_dictoffset*/
1492 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001493 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001494 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001495 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001496 0, /*tp_is_gc*/
1497};
1498
1499
1500/* ===================================================================== */
1501/* Methods of BZ2Comp. */
1502
1503PyDoc_STRVAR(BZ2Comp_compress__doc__,
1504"compress(data) -> string\n\
1505\n\
1506Provide more data to the compressor object. It will return chunks of\n\
1507compressed data whenever possible. When you've finished providing data\n\
1508to compress, call the flush() method to finish the compression process,\n\
1509and return what is left in the internal buffers.\n\
1510");
1511
1512static PyObject *
1513BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1514{
1515 char *data;
1516 int datasize;
1517 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001518 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001519 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001520 bz_stream *bzs = &self->bzs;
1521 int bzerror;
1522
1523 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1524 return NULL;
1525
1526 ACQUIRE_LOCK(self);
1527 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001528 PyErr_SetString(PyExc_ValueError,
1529 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001530 goto error;
1531 }
1532
1533 ret = PyString_FromStringAndSize(NULL, bufsize);
1534 if (!ret)
1535 goto error;
1536
1537 bzs->next_in = data;
1538 bzs->avail_in = datasize;
1539 bzs->next_out = BUF(ret);
1540 bzs->avail_out = bufsize;
1541
1542 totalout = BZS_TOTAL_OUT(bzs);
1543
1544 for (;;) {
1545 Py_BEGIN_ALLOW_THREADS
1546 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1547 Py_END_ALLOW_THREADS
1548 if (bzerror != BZ_RUN_OK) {
1549 Util_CatchBZ2Error(bzerror);
1550 goto error;
1551 }
1552 if (bzs->avail_out == 0) {
1553 bufsize = Util_NewBufferSize(bufsize);
1554 if (_PyString_Resize(&ret, bufsize) < 0) {
1555 BZ2_bzCompressEnd(bzs);
1556 goto error;
1557 }
1558 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1559 - totalout);
1560 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1561 } else if (bzs->avail_in == 0) {
1562 break;
1563 }
1564 }
1565
Tim Petersf29f0c62002-11-09 04:28:17 +00001566 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001567
1568 RELEASE_LOCK(self);
1569 return ret;
1570
1571error:
1572 RELEASE_LOCK(self);
1573 Py_XDECREF(ret);
1574 return NULL;
1575}
1576
1577PyDoc_STRVAR(BZ2Comp_flush__doc__,
1578"flush() -> string\n\
1579\n\
1580Finish the compression process and return what is left in internal buffers.\n\
1581You must not use the compressor object after calling this method.\n\
1582");
1583
1584static PyObject *
1585BZ2Comp_flush(BZ2CompObject *self)
1586{
1587 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001588 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001589 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001590 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001591 int bzerror;
1592
1593 ACQUIRE_LOCK(self);
1594 if (!self->running) {
1595 PyErr_SetString(PyExc_ValueError, "object was already "
1596 "flushed");
1597 goto error;
1598 }
1599 self->running = 0;
1600
1601 ret = PyString_FromStringAndSize(NULL, bufsize);
1602 if (!ret)
1603 goto error;
1604
1605 bzs->next_out = BUF(ret);
1606 bzs->avail_out = bufsize;
1607
1608 totalout = BZS_TOTAL_OUT(bzs);
1609
1610 for (;;) {
1611 Py_BEGIN_ALLOW_THREADS
1612 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1613 Py_END_ALLOW_THREADS
1614 if (bzerror == BZ_STREAM_END) {
1615 break;
1616 } else if (bzerror != BZ_FINISH_OK) {
1617 Util_CatchBZ2Error(bzerror);
1618 goto error;
1619 }
1620 if (bzs->avail_out == 0) {
1621 bufsize = Util_NewBufferSize(bufsize);
1622 if (_PyString_Resize(&ret, bufsize) < 0)
1623 goto error;
1624 bzs->next_out = BUF(ret);
1625 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1626 - totalout);
1627 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1628 }
1629 }
1630
1631 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001632 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001633
1634 RELEASE_LOCK(self);
1635 return ret;
1636
1637error:
1638 RELEASE_LOCK(self);
1639 Py_XDECREF(ret);
1640 return NULL;
1641}
1642
1643static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001644 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1645 BZ2Comp_compress__doc__},
1646 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1647 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001648 {NULL, NULL} /* sentinel */
1649};
1650
1651
1652/* ===================================================================== */
1653/* Slot definitions for BZ2Comp_Type. */
1654
1655static int
1656BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1657{
1658 int compresslevel = 9;
1659 int bzerror;
1660 static char *kwlist[] = {"compresslevel", 0};
1661
1662 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1663 kwlist, &compresslevel))
1664 return -1;
1665
1666 if (compresslevel < 1 || compresslevel > 9) {
1667 PyErr_SetString(PyExc_ValueError,
1668 "compresslevel must be between 1 and 9");
1669 goto error;
1670 }
1671
1672#ifdef WITH_THREAD
1673 self->lock = PyThread_allocate_lock();
1674 if (!self->lock)
1675 goto error;
1676#endif
1677
1678 memset(&self->bzs, 0, sizeof(bz_stream));
1679 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1680 if (bzerror != BZ_OK) {
1681 Util_CatchBZ2Error(bzerror);
1682 goto error;
1683 }
1684
1685 self->running = 1;
1686
1687 return 0;
1688error:
1689#ifdef WITH_THREAD
1690 if (self->lock)
1691 PyThread_free_lock(self->lock);
1692#endif
1693 return -1;
1694}
1695
1696static void
1697BZ2Comp_dealloc(BZ2CompObject *self)
1698{
1699#ifdef WITH_THREAD
1700 if (self->lock)
1701 PyThread_free_lock(self->lock);
1702#endif
1703 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001704 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001705}
1706
1707
1708/* ===================================================================== */
1709/* BZ2Comp_Type definition. */
1710
1711PyDoc_STRVAR(BZ2Comp__doc__,
1712"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1713\n\
1714Create a new compressor object. This object may be used to compress\n\
1715data sequentially. If you want to compress data in one shot, use the\n\
1716compress() function instead. The compresslevel parameter, if given,\n\
1717must be a number between 1 and 9.\n\
1718");
1719
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001720static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001721 PyObject_HEAD_INIT(NULL)
1722 0, /*ob_size*/
1723 "bz2.BZ2Compressor", /*tp_name*/
1724 sizeof(BZ2CompObject), /*tp_basicsize*/
1725 0, /*tp_itemsize*/
1726 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1727 0, /*tp_print*/
1728 0, /*tp_getattr*/
1729 0, /*tp_setattr*/
1730 0, /*tp_compare*/
1731 0, /*tp_repr*/
1732 0, /*tp_as_number*/
1733 0, /*tp_as_sequence*/
1734 0, /*tp_as_mapping*/
1735 0, /*tp_hash*/
1736 0, /*tp_call*/
1737 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001738 PyObject_GenericGetAttr,/*tp_getattro*/
1739 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001740 0, /*tp_as_buffer*/
1741 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1742 BZ2Comp__doc__, /*tp_doc*/
1743 0, /*tp_traverse*/
1744 0, /*tp_clear*/
1745 0, /*tp_richcompare*/
1746 0, /*tp_weaklistoffset*/
1747 0, /*tp_iter*/
1748 0, /*tp_iternext*/
1749 BZ2Comp_methods, /*tp_methods*/
1750 0, /*tp_members*/
1751 0, /*tp_getset*/
1752 0, /*tp_base*/
1753 0, /*tp_dict*/
1754 0, /*tp_descr_get*/
1755 0, /*tp_descr_set*/
1756 0, /*tp_dictoffset*/
1757 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001758 PyType_GenericAlloc, /*tp_alloc*/
1759 PyType_GenericNew, /*tp_new*/
1760 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001761 0, /*tp_is_gc*/
1762};
1763
1764
1765/* ===================================================================== */
1766/* Members of BZ2Decomp. */
1767
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001768#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001769#define OFF(x) offsetof(BZ2DecompObject, x)
1770
1771static PyMemberDef BZ2Decomp_members[] = {
1772 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1773 {NULL} /* Sentinel */
1774};
1775
1776
1777/* ===================================================================== */
1778/* Methods of BZ2Decomp. */
1779
1780PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1781"decompress(data) -> string\n\
1782\n\
1783Provide more data to the decompressor object. It will return chunks\n\
1784of decompressed data whenever possible. If you try to decompress data\n\
1785after the end of stream is found, EOFError will be raised. If any data\n\
1786was found after the end of stream, it'll be ignored and saved in\n\
1787unused_data attribute.\n\
1788");
1789
1790static PyObject *
1791BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1792{
1793 char *data;
1794 int datasize;
1795 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001796 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001797 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001798 bz_stream *bzs = &self->bzs;
1799 int bzerror;
1800
1801 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1802 return NULL;
1803
1804 ACQUIRE_LOCK(self);
1805 if (!self->running) {
1806 PyErr_SetString(PyExc_EOFError, "end of stream was "
1807 "already found");
1808 goto error;
1809 }
1810
1811 ret = PyString_FromStringAndSize(NULL, bufsize);
1812 if (!ret)
1813 goto error;
1814
1815 bzs->next_in = data;
1816 bzs->avail_in = datasize;
1817 bzs->next_out = BUF(ret);
1818 bzs->avail_out = bufsize;
1819
1820 totalout = BZS_TOTAL_OUT(bzs);
1821
1822 for (;;) {
1823 Py_BEGIN_ALLOW_THREADS
1824 bzerror = BZ2_bzDecompress(bzs);
1825 Py_END_ALLOW_THREADS
1826 if (bzerror == BZ_STREAM_END) {
1827 if (bzs->avail_in != 0) {
1828 Py_DECREF(self->unused_data);
1829 self->unused_data =
1830 PyString_FromStringAndSize(bzs->next_in,
1831 bzs->avail_in);
1832 }
1833 self->running = 0;
1834 break;
1835 }
1836 if (bzerror != BZ_OK) {
1837 Util_CatchBZ2Error(bzerror);
1838 goto error;
1839 }
1840 if (bzs->avail_out == 0) {
1841 bufsize = Util_NewBufferSize(bufsize);
1842 if (_PyString_Resize(&ret, bufsize) < 0) {
1843 BZ2_bzDecompressEnd(bzs);
1844 goto error;
1845 }
1846 bzs->next_out = BUF(ret);
1847 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1848 - totalout);
1849 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1850 } else if (bzs->avail_in == 0) {
1851 break;
1852 }
1853 }
1854
1855 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001856 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001857
1858 RELEASE_LOCK(self);
1859 return ret;
1860
1861error:
1862 RELEASE_LOCK(self);
1863 Py_XDECREF(ret);
1864 return NULL;
1865}
1866
1867static PyMethodDef BZ2Decomp_methods[] = {
1868 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1869 {NULL, NULL} /* sentinel */
1870};
1871
1872
1873/* ===================================================================== */
1874/* Slot definitions for BZ2Decomp_Type. */
1875
1876static int
1877BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1878{
1879 int bzerror;
1880
1881 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1882 return -1;
1883
1884#ifdef WITH_THREAD
1885 self->lock = PyThread_allocate_lock();
1886 if (!self->lock)
1887 goto error;
1888#endif
1889
1890 self->unused_data = PyString_FromString("");
1891 if (!self->unused_data)
1892 goto error;
1893
1894 memset(&self->bzs, 0, sizeof(bz_stream));
1895 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1896 if (bzerror != BZ_OK) {
1897 Util_CatchBZ2Error(bzerror);
1898 goto error;
1899 }
1900
1901 self->running = 1;
1902
1903 return 0;
1904
1905error:
1906#ifdef WITH_THREAD
1907 if (self->lock)
1908 PyThread_free_lock(self->lock);
1909#endif
1910 Py_XDECREF(self->unused_data);
1911 return -1;
1912}
1913
1914static void
1915BZ2Decomp_dealloc(BZ2DecompObject *self)
1916{
1917#ifdef WITH_THREAD
1918 if (self->lock)
1919 PyThread_free_lock(self->lock);
1920#endif
1921 Py_XDECREF(self->unused_data);
1922 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001923 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001924}
1925
1926
1927/* ===================================================================== */
1928/* BZ2Decomp_Type definition. */
1929
1930PyDoc_STRVAR(BZ2Decomp__doc__,
1931"BZ2Decompressor() -> decompressor object\n\
1932\n\
1933Create a new decompressor object. This object may be used to decompress\n\
1934data sequentially. If you want to decompress data in one shot, use the\n\
1935decompress() function instead.\n\
1936");
1937
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001938static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001939 PyObject_HEAD_INIT(NULL)
1940 0, /*ob_size*/
1941 "bz2.BZ2Decompressor", /*tp_name*/
1942 sizeof(BZ2DecompObject), /*tp_basicsize*/
1943 0, /*tp_itemsize*/
1944 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1945 0, /*tp_print*/
1946 0, /*tp_getattr*/
1947 0, /*tp_setattr*/
1948 0, /*tp_compare*/
1949 0, /*tp_repr*/
1950 0, /*tp_as_number*/
1951 0, /*tp_as_sequence*/
1952 0, /*tp_as_mapping*/
1953 0, /*tp_hash*/
1954 0, /*tp_call*/
1955 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001956 PyObject_GenericGetAttr,/*tp_getattro*/
1957 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001958 0, /*tp_as_buffer*/
1959 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1960 BZ2Decomp__doc__, /*tp_doc*/
1961 0, /*tp_traverse*/
1962 0, /*tp_clear*/
1963 0, /*tp_richcompare*/
1964 0, /*tp_weaklistoffset*/
1965 0, /*tp_iter*/
1966 0, /*tp_iternext*/
1967 BZ2Decomp_methods, /*tp_methods*/
1968 BZ2Decomp_members, /*tp_members*/
1969 0, /*tp_getset*/
1970 0, /*tp_base*/
1971 0, /*tp_dict*/
1972 0, /*tp_descr_get*/
1973 0, /*tp_descr_set*/
1974 0, /*tp_dictoffset*/
1975 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001976 PyType_GenericAlloc, /*tp_alloc*/
1977 PyType_GenericNew, /*tp_new*/
1978 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001979 0, /*tp_is_gc*/
1980};
1981
1982
1983/* ===================================================================== */
1984/* Module functions. */
1985
1986PyDoc_STRVAR(bz2_compress__doc__,
1987"compress(data [, compresslevel=9]) -> string\n\
1988\n\
1989Compress data in one shot. If you want to compress data sequentially,\n\
1990use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1991given, must be a number between 1 and 9.\n\
1992");
1993
1994static PyObject *
1995bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1996{
1997 int compresslevel=9;
1998 char *data;
1999 int datasize;
2000 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002001 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002002 bz_stream _bzs;
2003 bz_stream *bzs = &_bzs;
2004 int bzerror;
2005 static char *kwlist[] = {"data", "compresslevel", 0};
2006
2007 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2008 kwlist, &data, &datasize,
2009 &compresslevel))
2010 return NULL;
2011
2012 if (compresslevel < 1 || compresslevel > 9) {
2013 PyErr_SetString(PyExc_ValueError,
2014 "compresslevel must be between 1 and 9");
2015 return NULL;
2016 }
2017
2018 /* Conforming to bz2 manual, this is large enough to fit compressed
2019 * data in one shot. We will check it later anyway. */
2020 bufsize = datasize + (datasize/100+1) + 600;
2021
2022 ret = PyString_FromStringAndSize(NULL, bufsize);
2023 if (!ret)
2024 return NULL;
2025
2026 memset(bzs, 0, sizeof(bz_stream));
2027
2028 bzs->next_in = data;
2029 bzs->avail_in = datasize;
2030 bzs->next_out = BUF(ret);
2031 bzs->avail_out = bufsize;
2032
2033 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2034 if (bzerror != BZ_OK) {
2035 Util_CatchBZ2Error(bzerror);
2036 Py_DECREF(ret);
2037 return NULL;
2038 }
Tim Peterse3228092002-11-09 04:21:44 +00002039
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002040 for (;;) {
2041 Py_BEGIN_ALLOW_THREADS
2042 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2043 Py_END_ALLOW_THREADS
2044 if (bzerror == BZ_STREAM_END) {
2045 break;
2046 } else if (bzerror != BZ_FINISH_OK) {
2047 BZ2_bzCompressEnd(bzs);
2048 Util_CatchBZ2Error(bzerror);
2049 Py_DECREF(ret);
2050 return NULL;
2051 }
2052 if (bzs->avail_out == 0) {
2053 bufsize = Util_NewBufferSize(bufsize);
2054 if (_PyString_Resize(&ret, bufsize) < 0) {
2055 BZ2_bzCompressEnd(bzs);
2056 Py_DECREF(ret);
2057 return NULL;
2058 }
2059 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2060 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2061 }
2062 }
2063
2064 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002065 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002066 BZ2_bzCompressEnd(bzs);
2067
2068 return ret;
2069}
2070
2071PyDoc_STRVAR(bz2_decompress__doc__,
2072"decompress(data) -> decompressed data\n\
2073\n\
2074Decompress data in one shot. If you want to decompress data sequentially,\n\
2075use an instance of BZ2Decompressor instead.\n\
2076");
2077
2078static PyObject *
2079bz2_decompress(PyObject *self, PyObject *args)
2080{
2081 char *data;
2082 int datasize;
2083 int bufsize = SMALLCHUNK;
2084 PyObject *ret;
2085 bz_stream _bzs;
2086 bz_stream *bzs = &_bzs;
2087 int bzerror;
2088
2089 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2090 return NULL;
2091
2092 if (datasize == 0)
2093 return PyString_FromString("");
2094
2095 ret = PyString_FromStringAndSize(NULL, bufsize);
2096 if (!ret)
2097 return NULL;
2098
2099 memset(bzs, 0, sizeof(bz_stream));
2100
2101 bzs->next_in = data;
2102 bzs->avail_in = datasize;
2103 bzs->next_out = BUF(ret);
2104 bzs->avail_out = bufsize;
2105
2106 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2107 if (bzerror != BZ_OK) {
2108 Util_CatchBZ2Error(bzerror);
2109 Py_DECREF(ret);
2110 return NULL;
2111 }
Tim Peterse3228092002-11-09 04:21:44 +00002112
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002113 for (;;) {
2114 Py_BEGIN_ALLOW_THREADS
2115 bzerror = BZ2_bzDecompress(bzs);
2116 Py_END_ALLOW_THREADS
2117 if (bzerror == BZ_STREAM_END) {
2118 break;
2119 } else if (bzerror != BZ_OK) {
2120 BZ2_bzDecompressEnd(bzs);
2121 Util_CatchBZ2Error(bzerror);
2122 Py_DECREF(ret);
2123 return NULL;
2124 }
2125 if (bzs->avail_out == 0) {
2126 bufsize = Util_NewBufferSize(bufsize);
2127 if (_PyString_Resize(&ret, bufsize) < 0) {
2128 BZ2_bzDecompressEnd(bzs);
2129 Py_DECREF(ret);
2130 return NULL;
2131 }
2132 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2133 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2134 } else if (bzs->avail_in == 0) {
2135 BZ2_bzDecompressEnd(bzs);
2136 PyErr_SetString(PyExc_ValueError,
2137 "couldn't find end of stream");
2138 Py_DECREF(ret);
2139 return NULL;
2140 }
2141 }
2142
2143 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002144 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002145 BZ2_bzDecompressEnd(bzs);
2146
2147 return ret;
2148}
2149
2150static PyMethodDef bz2_methods[] = {
2151 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2152 bz2_compress__doc__},
2153 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2154 bz2_decompress__doc__},
2155 {NULL, NULL} /* sentinel */
2156};
2157
2158/* ===================================================================== */
2159/* Initialization function. */
2160
2161PyDoc_STRVAR(bz2__doc__,
2162"The python bz2 module provides a comprehensive interface for\n\
2163the bz2 compression library. It implements a complete file\n\
2164interface, one shot (de)compression functions, and types for\n\
2165sequential (de)compression.\n\
2166");
2167
2168DL_EXPORT(void)
2169initbz2(void)
2170{
2171 PyObject *m;
2172
2173 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002174 BZ2Comp_Type.ob_type = &PyType_Type;
2175 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176
2177 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2178
2179 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2180
2181 Py_INCREF(&BZ2File_Type);
2182 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2183
2184 Py_INCREF(&BZ2Comp_Type);
2185 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2186
2187 Py_INCREF(&BZ2Decomp_Type);
2188 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2189}