blob: 83582bd937452daaff8d2afff0121f62d14d0feb [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000034
35#ifdef BZ_CONFIG_ERROR
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#if SIZEOF_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#elif SIZEOF_LONG_LONG >= 8
41#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000042 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000043#else
44#define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46#endif
47
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000048#else /* ! BZ_CONFIG_ERROR */
49
50#define BZ2_bzRead bzRead
51#define BZ2_bzReadOpen bzReadOpen
52#define BZ2_bzReadClose bzReadClose
53#define BZ2_bzWrite bzWrite
54#define BZ2_bzWriteOpen bzWriteOpen
55#define BZ2_bzWriteClose bzWriteClose
56#define BZ2_bzCompress bzCompress
57#define BZ2_bzCompressInit bzCompressInit
58#define BZ2_bzCompressEnd bzCompressEnd
59#define BZ2_bzDecompress bzDecompress
60#define BZ2_bzDecompressInit bzDecompressInit
61#define BZ2_bzDecompressEnd bzDecompressEnd
62
63#define BZS_TOTAL_OUT(bzs) bzs->total_out
64
65#endif /* ! BZ_CONFIG_ERROR */
66
67
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000068#ifdef WITH_THREAD
69#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
76#ifdef WITH_UNIVERSAL_NEWLINES
77/* Bits in f_newlinetypes */
78#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
79#define NEWLINE_CR 1 /* \r newline seen */
80#define NEWLINE_LF 2 /* \n newline seen */
81#define NEWLINE_CRLF 4 /* \r\n newline seen */
82#endif
83
84/* ===================================================================== */
85/* Structure definitions. */
86
87typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000088 PyObject_HEAD
89 PyObject *file;
90
91 char* f_buf; /* Allocated readahead buffer */
92 char* f_bufend; /* Points after last occupied position */
93 char* f_bufptr; /* Current buffer position */
94
95 int f_softspace; /* Flag used by 'print' command */
96
97#ifdef WITH_UNIVERSAL_NEWLINES
98 int f_univ_newline; /* Handle any newline convention */
99 int f_newlinetypes; /* Types of newlines seen */
100 int f_skipnextlf; /* Skip next \n */
101#endif
102
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000103 BZFILE *fp;
104 int mode;
105 long pos;
106 long size;
107#ifdef WITH_THREAD
108 PyThread_type_lock lock;
109#endif
110} BZ2FileObject;
111
112typedef struct {
113 PyObject_HEAD
114 bz_stream bzs;
115 int running;
116#ifdef WITH_THREAD
117 PyThread_type_lock lock;
118#endif
119} BZ2CompObject;
120
121typedef struct {
122 PyObject_HEAD
123 bz_stream bzs;
124 int running;
125 PyObject *unused_data;
126#ifdef WITH_THREAD
127 PyThread_type_lock lock;
128#endif
129} BZ2DecompObject;
130
131/* ===================================================================== */
132/* Utility functions. */
133
134static int
135Util_CatchBZ2Error(int bzerror)
136{
137 int ret = 0;
138 switch(bzerror) {
139 case BZ_OK:
140 case BZ_STREAM_END:
141 break;
142
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000143#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000144 case BZ_CONFIG_ERROR:
145 PyErr_SetString(PyExc_SystemError,
146 "the bz2 library was not compiled "
147 "correctly");
148 ret = 1;
149 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000150#endif
Tim Peterse3228092002-11-09 04:21:44 +0000151
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_PARAM_ERROR:
153 PyErr_SetString(PyExc_ValueError,
154 "the bz2 library has received wrong "
155 "parameters");
156 ret = 1;
157 break;
Tim Peterse3228092002-11-09 04:21:44 +0000158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000159 case BZ_MEM_ERROR:
160 PyErr_NoMemory();
161 ret = 1;
162 break;
163
164 case BZ_DATA_ERROR:
165 case BZ_DATA_ERROR_MAGIC:
166 PyErr_SetString(PyExc_IOError, "invalid data stream");
167 ret = 1;
168 break;
169
170 case BZ_IO_ERROR:
171 PyErr_SetString(PyExc_IOError, "unknown IO error");
172 ret = 1;
173 break;
174
175 case BZ_UNEXPECTED_EOF:
176 PyErr_SetString(PyExc_EOFError,
177 "compressed file ended before the "
178 "logical end-of-stream was detected");
179 ret = 1;
180 break;
181
182 case BZ_SEQUENCE_ERROR:
183 PyErr_SetString(PyExc_RuntimeError,
184 "wrong sequence of bz2 library "
185 "commands used");
186 ret = 1;
187 break;
188 }
189 return ret;
190}
191
192#if BUFSIZ < 8192
193#define SMALLCHUNK 8192
194#else
195#define SMALLCHUNK BUFSIZ
196#endif
197
198#if SIZEOF_INT < 4
199#define BIGCHUNK (512 * 32)
200#else
201#define BIGCHUNK (512 * 1024)
202#endif
203
204/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
205static size_t
206Util_NewBufferSize(size_t currentsize)
207{
208 if (currentsize > SMALLCHUNK) {
209 /* Keep doubling until we reach BIGCHUNK;
210 then keep adding BIGCHUNK. */
211 if (currentsize <= BIGCHUNK)
212 return currentsize + currentsize;
213 else
214 return currentsize + BIGCHUNK;
215 }
216 return currentsize + SMALLCHUNK;
217}
218
219/* This is a hacked version of Python's fileobject.c:get_line(). */
220static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000221Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000222{
223 char c;
224 char *buf, *end;
225 size_t total_v_size; /* total # of slots in buffer */
226 size_t used_v_size; /* # used slots in buffer */
227 size_t increment; /* amount to increment the buffer */
228 PyObject *v;
229 int bzerror;
230#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000231 int newlinetypes = f->f_newlinetypes;
232 int skipnextlf = f->f_skipnextlf;
233 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000234#endif
235
236 total_v_size = n > 0 ? n : 100;
237 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
238 if (v == NULL)
239 return NULL;
240
241 buf = BUF(v);
242 end = buf + total_v_size;
243
244 for (;;) {
245 Py_BEGIN_ALLOW_THREADS
246#ifdef WITH_UNIVERSAL_NEWLINES
247 if (univ_newline) {
248 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000249 BZ2_bzRead(&bzerror, f->fp, &c, 1);
250 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 if (bzerror != BZ_OK || buf == end)
252 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000253 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254 skipnextlf = 0;
255 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000256 /* Seeing a \n here with
257 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000258 * saw a \r before.
259 */
260 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000261 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 &c, 1);
263 if (bzerror != BZ_OK)
264 break;
265 } else {
266 newlinetypes |= NEWLINE_CR;
267 }
268 }
269 if (c == '\r') {
270 skipnextlf = 1;
271 c = '\n';
272 } else if ( c == '\n')
273 newlinetypes |= NEWLINE_LF;
274 *buf++ = c;
275 if (c == '\n') break;
276 }
277 if (bzerror == BZ_STREAM_END && skipnextlf)
278 newlinetypes |= NEWLINE_CR;
279 } else /* If not universal newlines use the normal loop */
280#endif
281 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000282 BZ2_bzRead(&bzerror, f->fp, &c, 1);
283 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 *buf++ = c;
285 } while (bzerror == BZ_OK && c != '\n' && buf != end);
286 Py_END_ALLOW_THREADS
287#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->f_newlinetypes = newlinetypes;
289 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290#endif
291 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000292 f->size = f->pos;
293 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294 break;
295 } else if (bzerror != BZ_OK) {
296 Util_CatchBZ2Error(bzerror);
297 Py_DECREF(v);
298 return NULL;
299 }
300 if (c == '\n')
301 break;
302 /* Must be because buf == end */
303 if (n > 0)
304 break;
305 used_v_size = total_v_size;
306 increment = total_v_size >> 2; /* mild exponential growth */
307 total_v_size += increment;
308 if (total_v_size > INT_MAX) {
309 PyErr_SetString(PyExc_OverflowError,
310 "line is longer than a Python string can hold");
311 Py_DECREF(v);
312 return NULL;
313 }
314 if (_PyString_Resize(&v, total_v_size) < 0)
315 return NULL;
316 buf = BUF(v) + used_v_size;
317 end = BUF(v) + total_v_size;
318 }
319
320 used_v_size = buf - BUF(v);
321 if (used_v_size != total_v_size)
322 _PyString_Resize(&v, used_v_size);
323 return v;
324}
325
326#ifndef WITH_UNIVERSAL_NEWLINES
327#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
328#else
329/* This is a hacked version of Python's
330 * fileobject.c:Py_UniversalNewlineFread(). */
331size_t
332Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000333 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000334{
335 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000336 int newlinetypes, skipnextlf;
337
338 assert(buf != NULL);
339 assert(stream != NULL);
340
341 if (!f->f_univ_newline)
342 return BZ2_bzRead(bzerror, stream, buf, n);
343
344 newlinetypes = f->f_newlinetypes;
345 skipnextlf = f->f_skipnextlf;
346
347 /* Invariant: n is the number of bytes remaining to be filled
348 * in the buffer.
349 */
350 while (n) {
351 size_t nread;
352 int shortread;
353 char *src = dst;
354
355 nread = BZ2_bzRead(bzerror, stream, dst, n);
356 assert(nread <= n);
357 n -= nread; /* assuming 1 byte out for each in; will adjust */
358 shortread = n != 0; /* true iff EOF or error */
359 while (nread--) {
360 char c = *src++;
361 if (c == '\r') {
362 /* Save as LF and set flag to skip next LF. */
363 *dst++ = '\n';
364 skipnextlf = 1;
365 }
366 else if (skipnextlf && c == '\n') {
367 /* Skip LF, and remember we saw CR LF. */
368 skipnextlf = 0;
369 newlinetypes |= NEWLINE_CRLF;
370 ++n;
371 }
372 else {
373 /* Normal char to be stored in buffer. Also
374 * update the newlinetypes flag if either this
375 * is an LF or the previous char was a CR.
376 */
377 if (c == '\n')
378 newlinetypes |= NEWLINE_LF;
379 else if (skipnextlf)
380 newlinetypes |= NEWLINE_CR;
381 *dst++ = c;
382 skipnextlf = 0;
383 }
384 }
385 if (shortread) {
386 /* If this is EOF, update type flags. */
387 if (skipnextlf && *bzerror == BZ_STREAM_END)
388 newlinetypes |= NEWLINE_CR;
389 break;
390 }
391 }
392 f->f_newlinetypes = newlinetypes;
393 f->f_skipnextlf = skipnextlf;
394 return dst - buf;
395}
396#endif
397
398/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
399static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000400Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000401{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000402 if (f->f_buf != NULL) {
403 PyMem_Free(f->f_buf);
404 f->f_buf = NULL;
405 }
406}
407
408/* This is a hacked version of Python's fileobject.c:readahead(). */
409static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000410Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000411{
412 int chunksize;
413 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414
415 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000416 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 return 0;
418 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000420 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000421 if (f->mode == MODE_READ_EOF) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 return -1;
423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
722 continue;
723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
726 do {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
738 } while (p != NULL);
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
804 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 int len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
979 long offset;
980 char small_buffer[SMALLCHUNK];
981 char *buffer = small_buffer;
982 size_t buffersize = SMALLCHUNK;
983 int bytesread = 0;
984 int readsize;
985 int chunksize;
986 int bzerror;
987 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000988 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000989
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000990 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
991 return NULL;
992
993 ACQUIRE_LOCK(self);
994 Util_DropReadAhead(self);
995 switch (self->mode) {
996 case MODE_READ:
997 case MODE_READ_EOF:
998 break;
Tim Peterse3228092002-11-09 04:21:44 +0000999
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 case MODE_CLOSED:
1001 PyErr_SetString(PyExc_ValueError,
1002 "I/O operation on closed file");
1003 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001004
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001005 default:
1006 PyErr_SetString(PyExc_IOError,
1007 "seek works only while reading");
1008 goto cleanup;;
1009 }
1010
1011 if (offset < 0) {
1012 if (where == 1) {
1013 offset = self->pos + offset;
1014 rewind = 1;
1015 } else if (where == 2) {
1016 if (self->size == -1) {
1017 assert(self->mode != MODE_READ_EOF);
1018 for (;;) {
1019 Py_BEGIN_ALLOW_THREADS
1020 chunksize = Util_UnivNewlineRead(
1021 &bzerror, self->fp,
1022 buffer, buffersize,
1023 self);
1024 self->pos += chunksize;
1025 Py_END_ALLOW_THREADS
1026
1027 bytesread += chunksize;
1028 if (bzerror == BZ_STREAM_END) {
1029 break;
1030 } else if (bzerror != BZ_OK) {
1031 Util_CatchBZ2Error(bzerror);
1032 goto cleanup;
1033 }
1034 }
1035 self->mode = MODE_READ_EOF;
1036 self->size = self->pos;
1037 bytesread = 0;
1038 }
1039 offset = self->size + offset;
1040 if (offset >= self->pos)
1041 offset -= self->pos;
1042 else
1043 rewind = 1;
1044 }
1045 if (offset < 0)
1046 offset = 0;
1047 } else if (where == 0) {
1048 if (offset >= self->pos)
1049 offset -= self->pos;
1050 else
1051 rewind = 1;
1052 }
1053
1054 if (rewind) {
1055 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 if (bzerror != BZ_OK) {
1057 Util_CatchBZ2Error(bzerror);
1058 goto cleanup;
1059 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001060 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061 if (!ret)
1062 goto cleanup;
1063 Py_DECREF(ret);
1064 ret = NULL;
1065 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001066 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067 0, 0, NULL, 0);
1068 if (bzerror != BZ_OK) {
1069 Util_CatchBZ2Error(bzerror);
1070 goto cleanup;
1071 }
1072 self->mode = MODE_READ;
1073 } else if (self->mode == MODE_READ_EOF) {
1074 goto exit;
1075 }
1076
1077 if (offset == 0)
1078 goto exit;
1079
1080 /* Before getting here, offset must be set to the number of bytes
1081 * to walk forward. */
1082 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001083 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001084 readsize = buffersize;
1085 else
1086 readsize = offset-bytesread;
1087 Py_BEGIN_ALLOW_THREADS
1088 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1089 buffer, readsize, self);
1090 self->pos += chunksize;
1091 Py_END_ALLOW_THREADS
1092 bytesread += chunksize;
1093 if (bzerror == BZ_STREAM_END) {
1094 self->size = self->pos;
1095 self->mode = MODE_READ_EOF;
1096 break;
1097 } else if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1100 }
1101 if (bytesread == offset)
1102 break;
1103 }
1104
1105exit:
1106 Py_INCREF(Py_None);
1107 ret = Py_None;
1108
1109cleanup:
1110 RELEASE_LOCK(self);
1111 return ret;
1112}
1113
1114PyDoc_STRVAR(BZ2File_tell__doc__,
1115"tell() -> int\n\
1116\n\
1117Return the current file position, an integer (may be a long integer).\n\
1118");
1119
1120static PyObject *
1121BZ2File_tell(BZ2FileObject *self, PyObject *args)
1122{
1123 PyObject *ret = NULL;
1124
1125 if (self->mode == MODE_CLOSED) {
1126 PyErr_SetString(PyExc_ValueError,
1127 "I/O operation on closed file");
1128 goto cleanup;
1129 }
1130
1131 ret = PyInt_FromLong(self->pos);
1132
1133cleanup:
1134 return ret;
1135}
1136
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001137PyDoc_STRVAR(BZ2File_close__doc__,
1138"close() -> None or (perhaps) an integer\n\
1139\n\
1140Close the file. Sets data attribute .closed to true. A closed file\n\
1141cannot be used for further I/O operations. close() may be called more\n\
1142than once without error.\n\
1143");
1144
1145static PyObject *
1146BZ2File_close(BZ2FileObject *self)
1147{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001148 PyObject *ret = NULL;
1149 int bzerror = BZ_OK;
1150
1151 ACQUIRE_LOCK(self);
1152 switch (self->mode) {
1153 case MODE_READ:
1154 case MODE_READ_EOF:
1155 BZ2_bzReadClose(&bzerror, self->fp);
1156 break;
1157 case MODE_WRITE:
1158 BZ2_bzWriteClose(&bzerror, self->fp,
1159 0, NULL, NULL);
1160 break;
1161 }
1162 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001163 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001164 if (bzerror != BZ_OK) {
1165 Util_CatchBZ2Error(bzerror);
1166 Py_XDECREF(ret);
1167 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168 }
1169
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001170 RELEASE_LOCK(self);
1171 return ret;
1172}
1173
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001174static PyObject *BZ2File_getiter(BZ2FileObject *self);
1175
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001176static PyMethodDef BZ2File_methods[] = {
1177 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1178 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1179 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001180 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001181 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1182 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1183 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1184 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1186 {NULL, NULL} /* sentinel */
1187};
1188
1189
1190/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001191/* Getters and setters of BZ2File. */
1192
1193#ifdef WITH_UNIVERSAL_NEWLINES
1194/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1195static PyObject *
1196BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1197{
1198 switch (self->f_newlinetypes) {
1199 case NEWLINE_UNKNOWN:
1200 Py_INCREF(Py_None);
1201 return Py_None;
1202 case NEWLINE_CR:
1203 return PyString_FromString("\r");
1204 case NEWLINE_LF:
1205 return PyString_FromString("\n");
1206 case NEWLINE_CR|NEWLINE_LF:
1207 return Py_BuildValue("(ss)", "\r", "\n");
1208 case NEWLINE_CRLF:
1209 return PyString_FromString("\r\n");
1210 case NEWLINE_CR|NEWLINE_CRLF:
1211 return Py_BuildValue("(ss)", "\r", "\r\n");
1212 case NEWLINE_LF|NEWLINE_CRLF:
1213 return Py_BuildValue("(ss)", "\n", "\r\n");
1214 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1215 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1216 default:
1217 PyErr_Format(PyExc_SystemError,
1218 "Unknown newlines value 0x%x\n",
1219 self->f_newlinetypes);
1220 return NULL;
1221 }
1222}
1223#endif
1224
1225static PyObject *
1226BZ2File_get_closed(BZ2FileObject *self, void *closure)
1227{
1228 return PyInt_FromLong(self->mode == MODE_CLOSED);
1229}
1230
1231static PyObject *
1232BZ2File_get_mode(BZ2FileObject *self, void *closure)
1233{
1234 return PyObject_GetAttrString(self->file, "mode");
1235}
1236
1237static PyObject *
1238BZ2File_get_name(BZ2FileObject *self, void *closure)
1239{
1240 return PyObject_GetAttrString(self->file, "name");
1241}
1242
1243static PyGetSetDef BZ2File_getset[] = {
1244 {"closed", (getter)BZ2File_get_closed, NULL,
1245 "True if the file is closed"},
1246#ifdef WITH_UNIVERSAL_NEWLINES
1247 {"newlines", (getter)BZ2File_get_newlines, NULL,
1248 "end-of-line convention used in this file"},
1249#endif
1250 {"mode", (getter)BZ2File_get_mode, NULL,
1251 "file mode ('r', 'w', or 'U')"},
1252 {"name", (getter)BZ2File_get_name, NULL,
1253 "file name"},
1254 {NULL} /* Sentinel */
1255};
1256
1257
1258/* ===================================================================== */
1259/* Members of BZ2File_Type. */
1260
1261#undef OFF
1262#define OFF(x) offsetof(BZ2FileObject, x)
1263
1264static PyMemberDef BZ2File_members[] = {
1265 {"softspace", T_INT, OFF(f_softspace), 0,
1266 "flag indicating that a space needs to be printed; used by print"},
1267 {NULL} /* Sentinel */
1268};
1269
1270/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271/* Slot definitions for BZ2File_Type. */
1272
1273static int
1274BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1275{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276 static char *kwlist[] = {"filename", "mode", "buffering",
1277 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001278 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001279 char *mode = "r";
1280 int buffering = -1;
1281 int compresslevel = 9;
1282 int bzerror;
1283 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001284
1285 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001286
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001287 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1288 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001289 &compresslevel))
1290 return -1;
1291
1292 if (compresslevel < 1 || compresslevel > 9) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "compresslevel must be between 1 and 9");
1295 return -1;
1296 }
1297
1298 for (;;) {
1299 int error = 0;
1300 switch (*mode) {
1301 case 'r':
1302 case 'w':
1303 if (mode_char)
1304 error = 1;
1305 mode_char = *mode;
1306 break;
1307
1308 case 'b':
1309 break;
1310
1311 case 'U':
Martin v. Löwisc16f3bd2003-05-03 09:14:54 +00001312#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001313 self->f_univ_newline = 1;
Martin v. Löwisc16f3bd2003-05-03 09:14:54 +00001314#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001315 break;
1316
1317 default:
1318 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001319 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001320 }
1321 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001322 PyErr_Format(PyExc_ValueError,
1323 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001324 return -1;
1325 }
1326 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001327 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001328 break;
1329 }
1330
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001331 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001332
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001333 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1334 name, mode, buffering);
1335 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001336 return -1;
1337
1338 /* From now on, we have stuff to dealloc, so jump to error label
1339 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001340
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001341#ifdef WITH_THREAD
1342 self->lock = PyThread_allocate_lock();
1343 if (!self->lock)
1344 goto error;
1345#endif
1346
1347 if (mode_char == 'r')
1348 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001349 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001350 0, 0, NULL, 0);
1351 else
1352 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001353 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001354 compresslevel, 0, 0);
1355
1356 if (bzerror != BZ_OK) {
1357 Util_CatchBZ2Error(bzerror);
1358 goto error;
1359 }
1360
1361 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1362
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001363 return 0;
1364
1365error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001366 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001367#ifdef WITH_THREAD
1368 if (self->lock)
1369 PyThread_free_lock(self->lock);
1370#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001371 return -1;
1372}
1373
1374static void
1375BZ2File_dealloc(BZ2FileObject *self)
1376{
1377 int bzerror;
1378#ifdef WITH_THREAD
1379 if (self->lock)
1380 PyThread_free_lock(self->lock);
1381#endif
1382 switch (self->mode) {
1383 case MODE_READ:
1384 case MODE_READ_EOF:
1385 BZ2_bzReadClose(&bzerror, self->fp);
1386 break;
1387 case MODE_WRITE:
1388 BZ2_bzWriteClose(&bzerror, self->fp,
1389 0, NULL, NULL);
1390 break;
1391 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001392 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001393 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001394 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001395}
1396
1397/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1398static PyObject *
1399BZ2File_getiter(BZ2FileObject *self)
1400{
1401 if (self->mode == MODE_CLOSED) {
1402 PyErr_SetString(PyExc_ValueError,
1403 "I/O operation on closed file");
1404 return NULL;
1405 }
1406 Py_INCREF((PyObject*)self);
1407 return (PyObject *)self;
1408}
1409
1410/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1411#define READAHEAD_BUFSIZE 8192
1412static PyObject *
1413BZ2File_iternext(BZ2FileObject *self)
1414{
1415 PyStringObject* ret;
1416 ACQUIRE_LOCK(self);
1417 if (self->mode == MODE_CLOSED) {
1418 PyErr_SetString(PyExc_ValueError,
1419 "I/O operation on closed file");
1420 return NULL;
1421 }
1422 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1423 RELEASE_LOCK(self);
1424 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1425 Py_XDECREF(ret);
1426 return NULL;
1427 }
1428 return (PyObject *)ret;
1429}
1430
1431/* ===================================================================== */
1432/* BZ2File_Type definition. */
1433
1434PyDoc_VAR(BZ2File__doc__) =
1435PyDoc_STR(
1436"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1437\n\
1438Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1439writing. When opened for writing, the file will be created if it doesn't\n\
1440exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1441unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1442is given, must be a number between 1 and 9.\n\
1443")
1444#ifdef WITH_UNIVERSAL_NEWLINES
1445PyDoc_STR(
1446"\n\
1447Add a 'U' to mode to open the file for input with universal newline\n\
1448support. Any line ending in the input file will be seen as a '\\n' in\n\
1449Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1450for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1451'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1452newlines are available only when reading.\n\
1453")
1454#endif
1455;
1456
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001457static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001458 PyObject_HEAD_INIT(NULL)
1459 0, /*ob_size*/
1460 "bz2.BZ2File", /*tp_name*/
1461 sizeof(BZ2FileObject), /*tp_basicsize*/
1462 0, /*tp_itemsize*/
1463 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1464 0, /*tp_print*/
1465 0, /*tp_getattr*/
1466 0, /*tp_setattr*/
1467 0, /*tp_compare*/
1468 0, /*tp_repr*/
1469 0, /*tp_as_number*/
1470 0, /*tp_as_sequence*/
1471 0, /*tp_as_mapping*/
1472 0, /*tp_hash*/
1473 0, /*tp_call*/
1474 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001475 PyObject_GenericGetAttr,/*tp_getattro*/
1476 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001477 0, /*tp_as_buffer*/
1478 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1479 BZ2File__doc__, /*tp_doc*/
1480 0, /*tp_traverse*/
1481 0, /*tp_clear*/
1482 0, /*tp_richcompare*/
1483 0, /*tp_weaklistoffset*/
1484 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1485 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1486 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001487 BZ2File_members, /*tp_members*/
1488 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001489 0, /*tp_base*/
1490 0, /*tp_dict*/
1491 0, /*tp_descr_get*/
1492 0, /*tp_descr_set*/
1493 0, /*tp_dictoffset*/
1494 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001495 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001496 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001497 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001498 0, /*tp_is_gc*/
1499};
1500
1501
1502/* ===================================================================== */
1503/* Methods of BZ2Comp. */
1504
1505PyDoc_STRVAR(BZ2Comp_compress__doc__,
1506"compress(data) -> string\n\
1507\n\
1508Provide more data to the compressor object. It will return chunks of\n\
1509compressed data whenever possible. When you've finished providing data\n\
1510to compress, call the flush() method to finish the compression process,\n\
1511and return what is left in the internal buffers.\n\
1512");
1513
1514static PyObject *
1515BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1516{
1517 char *data;
1518 int datasize;
1519 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001520 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001521 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001522 bz_stream *bzs = &self->bzs;
1523 int bzerror;
1524
1525 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1526 return NULL;
1527
1528 ACQUIRE_LOCK(self);
1529 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001530 PyErr_SetString(PyExc_ValueError,
1531 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001532 goto error;
1533 }
1534
1535 ret = PyString_FromStringAndSize(NULL, bufsize);
1536 if (!ret)
1537 goto error;
1538
1539 bzs->next_in = data;
1540 bzs->avail_in = datasize;
1541 bzs->next_out = BUF(ret);
1542 bzs->avail_out = bufsize;
1543
1544 totalout = BZS_TOTAL_OUT(bzs);
1545
1546 for (;;) {
1547 Py_BEGIN_ALLOW_THREADS
1548 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1549 Py_END_ALLOW_THREADS
1550 if (bzerror != BZ_RUN_OK) {
1551 Util_CatchBZ2Error(bzerror);
1552 goto error;
1553 }
1554 if (bzs->avail_out == 0) {
1555 bufsize = Util_NewBufferSize(bufsize);
1556 if (_PyString_Resize(&ret, bufsize) < 0) {
1557 BZ2_bzCompressEnd(bzs);
1558 goto error;
1559 }
1560 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1561 - totalout);
1562 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1563 } else if (bzs->avail_in == 0) {
1564 break;
1565 }
1566 }
1567
Tim Petersf29f0c62002-11-09 04:28:17 +00001568 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001569
1570 RELEASE_LOCK(self);
1571 return ret;
1572
1573error:
1574 RELEASE_LOCK(self);
1575 Py_XDECREF(ret);
1576 return NULL;
1577}
1578
1579PyDoc_STRVAR(BZ2Comp_flush__doc__,
1580"flush() -> string\n\
1581\n\
1582Finish the compression process and return what is left in internal buffers.\n\
1583You must not use the compressor object after calling this method.\n\
1584");
1585
1586static PyObject *
1587BZ2Comp_flush(BZ2CompObject *self)
1588{
1589 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001590 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001591 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001592 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001593 int bzerror;
1594
1595 ACQUIRE_LOCK(self);
1596 if (!self->running) {
1597 PyErr_SetString(PyExc_ValueError, "object was already "
1598 "flushed");
1599 goto error;
1600 }
1601 self->running = 0;
1602
1603 ret = PyString_FromStringAndSize(NULL, bufsize);
1604 if (!ret)
1605 goto error;
1606
1607 bzs->next_out = BUF(ret);
1608 bzs->avail_out = bufsize;
1609
1610 totalout = BZS_TOTAL_OUT(bzs);
1611
1612 for (;;) {
1613 Py_BEGIN_ALLOW_THREADS
1614 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1615 Py_END_ALLOW_THREADS
1616 if (bzerror == BZ_STREAM_END) {
1617 break;
1618 } else if (bzerror != BZ_FINISH_OK) {
1619 Util_CatchBZ2Error(bzerror);
1620 goto error;
1621 }
1622 if (bzs->avail_out == 0) {
1623 bufsize = Util_NewBufferSize(bufsize);
1624 if (_PyString_Resize(&ret, bufsize) < 0)
1625 goto error;
1626 bzs->next_out = BUF(ret);
1627 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1628 - totalout);
1629 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1630 }
1631 }
1632
1633 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001634 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001635
1636 RELEASE_LOCK(self);
1637 return ret;
1638
1639error:
1640 RELEASE_LOCK(self);
1641 Py_XDECREF(ret);
1642 return NULL;
1643}
1644
1645static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001646 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1647 BZ2Comp_compress__doc__},
1648 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1649 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001650 {NULL, NULL} /* sentinel */
1651};
1652
1653
1654/* ===================================================================== */
1655/* Slot definitions for BZ2Comp_Type. */
1656
1657static int
1658BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1659{
1660 int compresslevel = 9;
1661 int bzerror;
1662 static char *kwlist[] = {"compresslevel", 0};
1663
1664 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1665 kwlist, &compresslevel))
1666 return -1;
1667
1668 if (compresslevel < 1 || compresslevel > 9) {
1669 PyErr_SetString(PyExc_ValueError,
1670 "compresslevel must be between 1 and 9");
1671 goto error;
1672 }
1673
1674#ifdef WITH_THREAD
1675 self->lock = PyThread_allocate_lock();
1676 if (!self->lock)
1677 goto error;
1678#endif
1679
1680 memset(&self->bzs, 0, sizeof(bz_stream));
1681 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1682 if (bzerror != BZ_OK) {
1683 Util_CatchBZ2Error(bzerror);
1684 goto error;
1685 }
1686
1687 self->running = 1;
1688
1689 return 0;
1690error:
1691#ifdef WITH_THREAD
1692 if (self->lock)
1693 PyThread_free_lock(self->lock);
1694#endif
1695 return -1;
1696}
1697
1698static void
1699BZ2Comp_dealloc(BZ2CompObject *self)
1700{
1701#ifdef WITH_THREAD
1702 if (self->lock)
1703 PyThread_free_lock(self->lock);
1704#endif
1705 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001706 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001707}
1708
1709
1710/* ===================================================================== */
1711/* BZ2Comp_Type definition. */
1712
1713PyDoc_STRVAR(BZ2Comp__doc__,
1714"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1715\n\
1716Create a new compressor object. This object may be used to compress\n\
1717data sequentially. If you want to compress data in one shot, use the\n\
1718compress() function instead. The compresslevel parameter, if given,\n\
1719must be a number between 1 and 9.\n\
1720");
1721
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001722static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001723 PyObject_HEAD_INIT(NULL)
1724 0, /*ob_size*/
1725 "bz2.BZ2Compressor", /*tp_name*/
1726 sizeof(BZ2CompObject), /*tp_basicsize*/
1727 0, /*tp_itemsize*/
1728 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1729 0, /*tp_print*/
1730 0, /*tp_getattr*/
1731 0, /*tp_setattr*/
1732 0, /*tp_compare*/
1733 0, /*tp_repr*/
1734 0, /*tp_as_number*/
1735 0, /*tp_as_sequence*/
1736 0, /*tp_as_mapping*/
1737 0, /*tp_hash*/
1738 0, /*tp_call*/
1739 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001740 PyObject_GenericGetAttr,/*tp_getattro*/
1741 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001742 0, /*tp_as_buffer*/
1743 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1744 BZ2Comp__doc__, /*tp_doc*/
1745 0, /*tp_traverse*/
1746 0, /*tp_clear*/
1747 0, /*tp_richcompare*/
1748 0, /*tp_weaklistoffset*/
1749 0, /*tp_iter*/
1750 0, /*tp_iternext*/
1751 BZ2Comp_methods, /*tp_methods*/
1752 0, /*tp_members*/
1753 0, /*tp_getset*/
1754 0, /*tp_base*/
1755 0, /*tp_dict*/
1756 0, /*tp_descr_get*/
1757 0, /*tp_descr_set*/
1758 0, /*tp_dictoffset*/
1759 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001760 PyType_GenericAlloc, /*tp_alloc*/
1761 PyType_GenericNew, /*tp_new*/
1762 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001763 0, /*tp_is_gc*/
1764};
1765
1766
1767/* ===================================================================== */
1768/* Members of BZ2Decomp. */
1769
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001770#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001771#define OFF(x) offsetof(BZ2DecompObject, x)
1772
1773static PyMemberDef BZ2Decomp_members[] = {
1774 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1775 {NULL} /* Sentinel */
1776};
1777
1778
1779/* ===================================================================== */
1780/* Methods of BZ2Decomp. */
1781
1782PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1783"decompress(data) -> string\n\
1784\n\
1785Provide more data to the decompressor object. It will return chunks\n\
1786of decompressed data whenever possible. If you try to decompress data\n\
1787after the end of stream is found, EOFError will be raised. If any data\n\
1788was found after the end of stream, it'll be ignored and saved in\n\
1789unused_data attribute.\n\
1790");
1791
1792static PyObject *
1793BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1794{
1795 char *data;
1796 int datasize;
1797 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001798 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001799 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001800 bz_stream *bzs = &self->bzs;
1801 int bzerror;
1802
1803 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1804 return NULL;
1805
1806 ACQUIRE_LOCK(self);
1807 if (!self->running) {
1808 PyErr_SetString(PyExc_EOFError, "end of stream was "
1809 "already found");
1810 goto error;
1811 }
1812
1813 ret = PyString_FromStringAndSize(NULL, bufsize);
1814 if (!ret)
1815 goto error;
1816
1817 bzs->next_in = data;
1818 bzs->avail_in = datasize;
1819 bzs->next_out = BUF(ret);
1820 bzs->avail_out = bufsize;
1821
1822 totalout = BZS_TOTAL_OUT(bzs);
1823
1824 for (;;) {
1825 Py_BEGIN_ALLOW_THREADS
1826 bzerror = BZ2_bzDecompress(bzs);
1827 Py_END_ALLOW_THREADS
1828 if (bzerror == BZ_STREAM_END) {
1829 if (bzs->avail_in != 0) {
1830 Py_DECREF(self->unused_data);
1831 self->unused_data =
1832 PyString_FromStringAndSize(bzs->next_in,
1833 bzs->avail_in);
1834 }
1835 self->running = 0;
1836 break;
1837 }
1838 if (bzerror != BZ_OK) {
1839 Util_CatchBZ2Error(bzerror);
1840 goto error;
1841 }
1842 if (bzs->avail_out == 0) {
1843 bufsize = Util_NewBufferSize(bufsize);
1844 if (_PyString_Resize(&ret, bufsize) < 0) {
1845 BZ2_bzDecompressEnd(bzs);
1846 goto error;
1847 }
1848 bzs->next_out = BUF(ret);
1849 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1850 - totalout);
1851 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1852 } else if (bzs->avail_in == 0) {
1853 break;
1854 }
1855 }
1856
1857 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001858 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001859
1860 RELEASE_LOCK(self);
1861 return ret;
1862
1863error:
1864 RELEASE_LOCK(self);
1865 Py_XDECREF(ret);
1866 return NULL;
1867}
1868
1869static PyMethodDef BZ2Decomp_methods[] = {
1870 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1871 {NULL, NULL} /* sentinel */
1872};
1873
1874
1875/* ===================================================================== */
1876/* Slot definitions for BZ2Decomp_Type. */
1877
1878static int
1879BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1880{
1881 int bzerror;
1882
1883 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1884 return -1;
1885
1886#ifdef WITH_THREAD
1887 self->lock = PyThread_allocate_lock();
1888 if (!self->lock)
1889 goto error;
1890#endif
1891
1892 self->unused_data = PyString_FromString("");
1893 if (!self->unused_data)
1894 goto error;
1895
1896 memset(&self->bzs, 0, sizeof(bz_stream));
1897 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1898 if (bzerror != BZ_OK) {
1899 Util_CatchBZ2Error(bzerror);
1900 goto error;
1901 }
1902
1903 self->running = 1;
1904
1905 return 0;
1906
1907error:
1908#ifdef WITH_THREAD
1909 if (self->lock)
1910 PyThread_free_lock(self->lock);
1911#endif
1912 Py_XDECREF(self->unused_data);
1913 return -1;
1914}
1915
1916static void
1917BZ2Decomp_dealloc(BZ2DecompObject *self)
1918{
1919#ifdef WITH_THREAD
1920 if (self->lock)
1921 PyThread_free_lock(self->lock);
1922#endif
1923 Py_XDECREF(self->unused_data);
1924 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001925 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001926}
1927
1928
1929/* ===================================================================== */
1930/* BZ2Decomp_Type definition. */
1931
1932PyDoc_STRVAR(BZ2Decomp__doc__,
1933"BZ2Decompressor() -> decompressor object\n\
1934\n\
1935Create a new decompressor object. This object may be used to decompress\n\
1936data sequentially. If you want to decompress data in one shot, use the\n\
1937decompress() function instead.\n\
1938");
1939
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001940static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941 PyObject_HEAD_INIT(NULL)
1942 0, /*ob_size*/
1943 "bz2.BZ2Decompressor", /*tp_name*/
1944 sizeof(BZ2DecompObject), /*tp_basicsize*/
1945 0, /*tp_itemsize*/
1946 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1947 0, /*tp_print*/
1948 0, /*tp_getattr*/
1949 0, /*tp_setattr*/
1950 0, /*tp_compare*/
1951 0, /*tp_repr*/
1952 0, /*tp_as_number*/
1953 0, /*tp_as_sequence*/
1954 0, /*tp_as_mapping*/
1955 0, /*tp_hash*/
1956 0, /*tp_call*/
1957 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001958 PyObject_GenericGetAttr,/*tp_getattro*/
1959 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001960 0, /*tp_as_buffer*/
1961 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1962 BZ2Decomp__doc__, /*tp_doc*/
1963 0, /*tp_traverse*/
1964 0, /*tp_clear*/
1965 0, /*tp_richcompare*/
1966 0, /*tp_weaklistoffset*/
1967 0, /*tp_iter*/
1968 0, /*tp_iternext*/
1969 BZ2Decomp_methods, /*tp_methods*/
1970 BZ2Decomp_members, /*tp_members*/
1971 0, /*tp_getset*/
1972 0, /*tp_base*/
1973 0, /*tp_dict*/
1974 0, /*tp_descr_get*/
1975 0, /*tp_descr_set*/
1976 0, /*tp_dictoffset*/
1977 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001978 PyType_GenericAlloc, /*tp_alloc*/
1979 PyType_GenericNew, /*tp_new*/
1980 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001981 0, /*tp_is_gc*/
1982};
1983
1984
1985/* ===================================================================== */
1986/* Module functions. */
1987
1988PyDoc_STRVAR(bz2_compress__doc__,
1989"compress(data [, compresslevel=9]) -> string\n\
1990\n\
1991Compress data in one shot. If you want to compress data sequentially,\n\
1992use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1993given, must be a number between 1 and 9.\n\
1994");
1995
1996static PyObject *
1997bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1998{
1999 int compresslevel=9;
2000 char *data;
2001 int datasize;
2002 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002003 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002004 bz_stream _bzs;
2005 bz_stream *bzs = &_bzs;
2006 int bzerror;
2007 static char *kwlist[] = {"data", "compresslevel", 0};
2008
2009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2010 kwlist, &data, &datasize,
2011 &compresslevel))
2012 return NULL;
2013
2014 if (compresslevel < 1 || compresslevel > 9) {
2015 PyErr_SetString(PyExc_ValueError,
2016 "compresslevel must be between 1 and 9");
2017 return NULL;
2018 }
2019
2020 /* Conforming to bz2 manual, this is large enough to fit compressed
2021 * data in one shot. We will check it later anyway. */
2022 bufsize = datasize + (datasize/100+1) + 600;
2023
2024 ret = PyString_FromStringAndSize(NULL, bufsize);
2025 if (!ret)
2026 return NULL;
2027
2028 memset(bzs, 0, sizeof(bz_stream));
2029
2030 bzs->next_in = data;
2031 bzs->avail_in = datasize;
2032 bzs->next_out = BUF(ret);
2033 bzs->avail_out = bufsize;
2034
2035 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2036 if (bzerror != BZ_OK) {
2037 Util_CatchBZ2Error(bzerror);
2038 Py_DECREF(ret);
2039 return NULL;
2040 }
Tim Peterse3228092002-11-09 04:21:44 +00002041
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002042 for (;;) {
2043 Py_BEGIN_ALLOW_THREADS
2044 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2045 Py_END_ALLOW_THREADS
2046 if (bzerror == BZ_STREAM_END) {
2047 break;
2048 } else if (bzerror != BZ_FINISH_OK) {
2049 BZ2_bzCompressEnd(bzs);
2050 Util_CatchBZ2Error(bzerror);
2051 Py_DECREF(ret);
2052 return NULL;
2053 }
2054 if (bzs->avail_out == 0) {
2055 bufsize = Util_NewBufferSize(bufsize);
2056 if (_PyString_Resize(&ret, bufsize) < 0) {
2057 BZ2_bzCompressEnd(bzs);
2058 Py_DECREF(ret);
2059 return NULL;
2060 }
2061 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2062 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2063 }
2064 }
2065
2066 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002067 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002068 BZ2_bzCompressEnd(bzs);
2069
2070 return ret;
2071}
2072
2073PyDoc_STRVAR(bz2_decompress__doc__,
2074"decompress(data) -> decompressed data\n\
2075\n\
2076Decompress data in one shot. If you want to decompress data sequentially,\n\
2077use an instance of BZ2Decompressor instead.\n\
2078");
2079
2080static PyObject *
2081bz2_decompress(PyObject *self, PyObject *args)
2082{
2083 char *data;
2084 int datasize;
2085 int bufsize = SMALLCHUNK;
2086 PyObject *ret;
2087 bz_stream _bzs;
2088 bz_stream *bzs = &_bzs;
2089 int bzerror;
2090
2091 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2092 return NULL;
2093
2094 if (datasize == 0)
2095 return PyString_FromString("");
2096
2097 ret = PyString_FromStringAndSize(NULL, bufsize);
2098 if (!ret)
2099 return NULL;
2100
2101 memset(bzs, 0, sizeof(bz_stream));
2102
2103 bzs->next_in = data;
2104 bzs->avail_in = datasize;
2105 bzs->next_out = BUF(ret);
2106 bzs->avail_out = bufsize;
2107
2108 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2109 if (bzerror != BZ_OK) {
2110 Util_CatchBZ2Error(bzerror);
2111 Py_DECREF(ret);
2112 return NULL;
2113 }
Tim Peterse3228092002-11-09 04:21:44 +00002114
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002115 for (;;) {
2116 Py_BEGIN_ALLOW_THREADS
2117 bzerror = BZ2_bzDecompress(bzs);
2118 Py_END_ALLOW_THREADS
2119 if (bzerror == BZ_STREAM_END) {
2120 break;
2121 } else if (bzerror != BZ_OK) {
2122 BZ2_bzDecompressEnd(bzs);
2123 Util_CatchBZ2Error(bzerror);
2124 Py_DECREF(ret);
2125 return NULL;
2126 }
2127 if (bzs->avail_out == 0) {
2128 bufsize = Util_NewBufferSize(bufsize);
2129 if (_PyString_Resize(&ret, bufsize) < 0) {
2130 BZ2_bzDecompressEnd(bzs);
2131 Py_DECREF(ret);
2132 return NULL;
2133 }
2134 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2135 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2136 } else if (bzs->avail_in == 0) {
2137 BZ2_bzDecompressEnd(bzs);
2138 PyErr_SetString(PyExc_ValueError,
2139 "couldn't find end of stream");
2140 Py_DECREF(ret);
2141 return NULL;
2142 }
2143 }
2144
2145 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002146 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002147 BZ2_bzDecompressEnd(bzs);
2148
2149 return ret;
2150}
2151
2152static PyMethodDef bz2_methods[] = {
2153 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2154 bz2_compress__doc__},
2155 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2156 bz2_decompress__doc__},
2157 {NULL, NULL} /* sentinel */
2158};
2159
2160/* ===================================================================== */
2161/* Initialization function. */
2162
2163PyDoc_STRVAR(bz2__doc__,
2164"The python bz2 module provides a comprehensive interface for\n\
2165the bz2 compression library. It implements a complete file\n\
2166interface, one shot (de)compression functions, and types for\n\
2167sequential (de)compression.\n\
2168");
2169
2170DL_EXPORT(void)
2171initbz2(void)
2172{
2173 PyObject *m;
2174
2175 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176 BZ2Comp_Type.ob_type = &PyType_Type;
2177 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002178
2179 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2180
2181 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2182
2183 Py_INCREF(&BZ2File_Type);
2184 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2185
2186 Py_INCREF(&BZ2Comp_Type);
2187 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2188
2189 Py_INCREF(&BZ2Decomp_Type);
2190 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2191}