blob: 8a93cd9547bc047abde0e920da068faf7587eb50 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000034
35#ifdef BZ_CONFIG_ERROR
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#if SIZEOF_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#elif SIZEOF_LONG_LONG >= 8
41#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000042 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000043#else
44#define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46#endif
47
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000048#else /* ! BZ_CONFIG_ERROR */
49
50#define BZ2_bzRead bzRead
51#define BZ2_bzReadOpen bzReadOpen
52#define BZ2_bzReadClose bzReadClose
53#define BZ2_bzWrite bzWrite
54#define BZ2_bzWriteOpen bzWriteOpen
55#define BZ2_bzWriteClose bzWriteClose
56#define BZ2_bzCompress bzCompress
57#define BZ2_bzCompressInit bzCompressInit
58#define BZ2_bzCompressEnd bzCompressEnd
59#define BZ2_bzDecompress bzDecompress
60#define BZ2_bzDecompressInit bzDecompressInit
61#define BZ2_bzDecompressEnd bzDecompressEnd
62
63#define BZS_TOTAL_OUT(bzs) bzs->total_out
64
65#endif /* ! BZ_CONFIG_ERROR */
66
67
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000068#ifdef WITH_THREAD
69#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000076/* Bits in f_newlinetypes */
77#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
78#define NEWLINE_CR 1 /* \r newline seen */
79#define NEWLINE_LF 2 /* \n newline seen */
80#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000081
82/* ===================================================================== */
83/* Structure definitions. */
84
85typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000086 PyObject_HEAD
87 PyObject *file;
88
89 char* f_buf; /* Allocated readahead buffer */
90 char* f_bufend; /* Points after last occupied position */
91 char* f_bufptr; /* Current buffer position */
92
93 int f_softspace; /* Flag used by 'print' command */
94
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000095 int f_univ_newline; /* Handle any newline convention */
96 int f_newlinetypes; /* Types of newlines seen */
97 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000099 BZFILE *fp;
100 int mode;
101 long pos;
102 long size;
103#ifdef WITH_THREAD
104 PyThread_type_lock lock;
105#endif
106} BZ2FileObject;
107
108typedef struct {
109 PyObject_HEAD
110 bz_stream bzs;
111 int running;
112#ifdef WITH_THREAD
113 PyThread_type_lock lock;
114#endif
115} BZ2CompObject;
116
117typedef struct {
118 PyObject_HEAD
119 bz_stream bzs;
120 int running;
121 PyObject *unused_data;
122#ifdef WITH_THREAD
123 PyThread_type_lock lock;
124#endif
125} BZ2DecompObject;
126
127/* ===================================================================== */
128/* Utility functions. */
129
130static int
131Util_CatchBZ2Error(int bzerror)
132{
133 int ret = 0;
134 switch(bzerror) {
135 case BZ_OK:
136 case BZ_STREAM_END:
137 break;
138
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000139#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000140 case BZ_CONFIG_ERROR:
141 PyErr_SetString(PyExc_SystemError,
142 "the bz2 library was not compiled "
143 "correctly");
144 ret = 1;
145 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000146#endif
Tim Peterse3228092002-11-09 04:21:44 +0000147
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000148 case BZ_PARAM_ERROR:
149 PyErr_SetString(PyExc_ValueError,
150 "the bz2 library has received wrong "
151 "parameters");
152 ret = 1;
153 break;
Tim Peterse3228092002-11-09 04:21:44 +0000154
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000155 case BZ_MEM_ERROR:
156 PyErr_NoMemory();
157 ret = 1;
158 break;
159
160 case BZ_DATA_ERROR:
161 case BZ_DATA_ERROR_MAGIC:
162 PyErr_SetString(PyExc_IOError, "invalid data stream");
163 ret = 1;
164 break;
165
166 case BZ_IO_ERROR:
167 PyErr_SetString(PyExc_IOError, "unknown IO error");
168 ret = 1;
169 break;
170
171 case BZ_UNEXPECTED_EOF:
172 PyErr_SetString(PyExc_EOFError,
173 "compressed file ended before the "
174 "logical end-of-stream was detected");
175 ret = 1;
176 break;
177
178 case BZ_SEQUENCE_ERROR:
179 PyErr_SetString(PyExc_RuntimeError,
180 "wrong sequence of bz2 library "
181 "commands used");
182 ret = 1;
183 break;
184 }
185 return ret;
186}
187
188#if BUFSIZ < 8192
189#define SMALLCHUNK 8192
190#else
191#define SMALLCHUNK BUFSIZ
192#endif
193
194#if SIZEOF_INT < 4
195#define BIGCHUNK (512 * 32)
196#else
197#define BIGCHUNK (512 * 1024)
198#endif
199
200/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
201static size_t
202Util_NewBufferSize(size_t currentsize)
203{
204 if (currentsize > SMALLCHUNK) {
205 /* Keep doubling until we reach BIGCHUNK;
206 then keep adding BIGCHUNK. */
207 if (currentsize <= BIGCHUNK)
208 return currentsize + currentsize;
209 else
210 return currentsize + BIGCHUNK;
211 }
212 return currentsize + SMALLCHUNK;
213}
214
215/* This is a hacked version of Python's fileobject.c:get_line(). */
216static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000217Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000218{
219 char c;
220 char *buf, *end;
221 size_t total_v_size; /* total # of slots in buffer */
222 size_t used_v_size; /* # used slots in buffer */
223 size_t increment; /* amount to increment the buffer */
224 PyObject *v;
225 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000226 int newlinetypes = f->f_newlinetypes;
227 int skipnextlf = f->f_skipnextlf;
228 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229
230 total_v_size = n > 0 ? n : 100;
231 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
232 if (v == NULL)
233 return NULL;
234
235 buf = BUF(v);
236 end = buf + total_v_size;
237
238 for (;;) {
239 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000240 if (univ_newline) {
241 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000242 BZ2_bzRead(&bzerror, f->fp, &c, 1);
243 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000244 if (bzerror != BZ_OK || buf == end)
245 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000246 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000247 skipnextlf = 0;
248 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000249 /* Seeing a \n here with
250 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 * saw a \r before.
252 */
253 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255 &c, 1);
256 if (bzerror != BZ_OK)
257 break;
258 } else {
259 newlinetypes |= NEWLINE_CR;
260 }
261 }
262 if (c == '\r') {
263 skipnextlf = 1;
264 c = '\n';
265 } else if ( c == '\n')
266 newlinetypes |= NEWLINE_LF;
267 *buf++ = c;
268 if (c == '\n') break;
269 }
270 if (bzerror == BZ_STREAM_END && skipnextlf)
271 newlinetypes |= NEWLINE_CR;
272 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000273 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000274 BZ2_bzRead(&bzerror, f->fp, &c, 1);
275 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000276 *buf++ = c;
277 } while (bzerror == BZ_OK && c != '\n' && buf != end);
278 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000279 f->f_newlinetypes = newlinetypes;
280 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000281 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000282 f->size = f->pos;
283 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 break;
285 } else if (bzerror != BZ_OK) {
286 Util_CatchBZ2Error(bzerror);
287 Py_DECREF(v);
288 return NULL;
289 }
290 if (c == '\n')
291 break;
292 /* Must be because buf == end */
293 if (n > 0)
294 break;
295 used_v_size = total_v_size;
296 increment = total_v_size >> 2; /* mild exponential growth */
297 total_v_size += increment;
298 if (total_v_size > INT_MAX) {
299 PyErr_SetString(PyExc_OverflowError,
300 "line is longer than a Python string can hold");
301 Py_DECREF(v);
302 return NULL;
303 }
304 if (_PyString_Resize(&v, total_v_size) < 0)
305 return NULL;
306 buf = BUF(v) + used_v_size;
307 end = BUF(v) + total_v_size;
308 }
309
310 used_v_size = buf - BUF(v);
311 if (used_v_size != total_v_size)
312 _PyString_Resize(&v, used_v_size);
313 return v;
314}
315
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000316/* This is a hacked version of Python's
317 * fileobject.c:Py_UniversalNewlineFread(). */
318size_t
319Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000320 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000321{
322 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000323 int newlinetypes, skipnextlf;
324
325 assert(buf != NULL);
326 assert(stream != NULL);
327
328 if (!f->f_univ_newline)
329 return BZ2_bzRead(bzerror, stream, buf, n);
330
331 newlinetypes = f->f_newlinetypes;
332 skipnextlf = f->f_skipnextlf;
333
334 /* Invariant: n is the number of bytes remaining to be filled
335 * in the buffer.
336 */
337 while (n) {
338 size_t nread;
339 int shortread;
340 char *src = dst;
341
342 nread = BZ2_bzRead(bzerror, stream, dst, n);
343 assert(nread <= n);
344 n -= nread; /* assuming 1 byte out for each in; will adjust */
345 shortread = n != 0; /* true iff EOF or error */
346 while (nread--) {
347 char c = *src++;
348 if (c == '\r') {
349 /* Save as LF and set flag to skip next LF. */
350 *dst++ = '\n';
351 skipnextlf = 1;
352 }
353 else if (skipnextlf && c == '\n') {
354 /* Skip LF, and remember we saw CR LF. */
355 skipnextlf = 0;
356 newlinetypes |= NEWLINE_CRLF;
357 ++n;
358 }
359 else {
360 /* Normal char to be stored in buffer. Also
361 * update the newlinetypes flag if either this
362 * is an LF or the previous char was a CR.
363 */
364 if (c == '\n')
365 newlinetypes |= NEWLINE_LF;
366 else if (skipnextlf)
367 newlinetypes |= NEWLINE_CR;
368 *dst++ = c;
369 skipnextlf = 0;
370 }
371 }
372 if (shortread) {
373 /* If this is EOF, update type flags. */
374 if (skipnextlf && *bzerror == BZ_STREAM_END)
375 newlinetypes |= NEWLINE_CR;
376 break;
377 }
378 }
379 f->f_newlinetypes = newlinetypes;
380 f->f_skipnextlf = skipnextlf;
381 return dst - buf;
382}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000383
384/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
385static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000386Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000387{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000388 if (f->f_buf != NULL) {
389 PyMem_Free(f->f_buf);
390 f->f_buf = NULL;
391 }
392}
393
394/* This is a hacked version of Python's fileobject.c:readahead(). */
395static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000396Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000397{
398 int chunksize;
399 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400
401 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000402 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403 return 0;
404 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000405 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000407 if (f->mode == MODE_READ_EOF) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408 return -1;
409 }
410 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
411 return -1;
412 }
413 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000414 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
415 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000416 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 f->size = f->pos;
420 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000421 } else if (bzerror != BZ_OK) {
422 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424 return -1;
425 }
426 f->f_bufptr = f->f_buf;
427 f->f_bufend = f->f_buf + chunksize;
428 return 0;
429}
430
431/* This is a hacked version of Python's
432 * fileobject.c:readahead_get_line_skip(). */
433static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000434Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 PyStringObject* s;
437 char *bufptr;
438 char *buf;
439 int len;
440
441 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000442 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000443 return NULL;
444
445 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000446 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000447 return (PyStringObject *)
448 PyString_FromStringAndSize(NULL, skip);
449 bufptr = memchr(f->f_bufptr, '\n', len);
450 if (bufptr != NULL) {
451 bufptr++; /* Count the '\n' */
452 len = bufptr - f->f_bufptr;
453 s = (PyStringObject *)
454 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000455 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000456 return NULL;
457 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
458 f->f_bufptr = bufptr;
459 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000460 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 } else {
462 bufptr = f->f_bufptr;
463 buf = f->f_buf;
464 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000465 s = Util_ReadAheadGetLineSkip(f, skip+len,
466 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 if (s == NULL) {
468 PyMem_Free(buf);
469 return NULL;
470 }
471 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
472 PyMem_Free(buf);
473 }
474 return s;
475}
476
477/* ===================================================================== */
478/* Methods of BZ2File. */
479
480PyDoc_STRVAR(BZ2File_read__doc__,
481"read([size]) -> string\n\
482\n\
483Read at most size uncompressed bytes, returned as a string. If the size\n\
484argument is negative or omitted, read until EOF is reached.\n\
485");
486
487/* This is a hacked version of Python's fileobject.c:file_read(). */
488static PyObject *
489BZ2File_read(BZ2FileObject *self, PyObject *args)
490{
491 long bytesrequested = -1;
492 size_t bytesread, buffersize, chunksize;
493 int bzerror;
494 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000495
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000496 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
497 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000498
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000499 ACQUIRE_LOCK(self);
500 switch (self->mode) {
501 case MODE_READ:
502 break;
503 case MODE_READ_EOF:
504 ret = PyString_FromString("");
505 goto cleanup;
506 case MODE_CLOSED:
507 PyErr_SetString(PyExc_ValueError,
508 "I/O operation on closed file");
509 goto cleanup;
510 default:
511 PyErr_SetString(PyExc_IOError,
512 "file is not ready for reading");
513 goto cleanup;
514 }
515
516 if (bytesrequested < 0)
517 buffersize = Util_NewBufferSize((size_t)0);
518 else
519 buffersize = bytesrequested;
520 if (buffersize > INT_MAX) {
521 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000522 "requested number of bytes is "
523 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000524 goto cleanup;
525 }
526 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
527 if (ret == NULL)
528 goto cleanup;
529 bytesread = 0;
530
531 for (;;) {
532 Py_BEGIN_ALLOW_THREADS
533 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
534 BUF(ret)+bytesread,
535 buffersize-bytesread,
536 self);
537 self->pos += chunksize;
538 Py_END_ALLOW_THREADS
539 bytesread += chunksize;
540 if (bzerror == BZ_STREAM_END) {
541 self->size = self->pos;
542 self->mode = MODE_READ_EOF;
543 break;
544 } else if (bzerror != BZ_OK) {
545 Util_CatchBZ2Error(bzerror);
546 Py_DECREF(ret);
547 ret = NULL;
548 goto cleanup;
549 }
550 if (bytesrequested < 0) {
551 buffersize = Util_NewBufferSize(buffersize);
552 if (_PyString_Resize(&ret, buffersize) < 0)
553 goto cleanup;
554 } else {
555 break;
556 }
557 }
558 if (bytesread != buffersize)
559 _PyString_Resize(&ret, bytesread);
560
561cleanup:
562 RELEASE_LOCK(self);
563 return ret;
564}
565
566PyDoc_STRVAR(BZ2File_readline__doc__,
567"readline([size]) -> string\n\
568\n\
569Return the next line from the file, as a string, retaining newline.\n\
570A non-negative size argument will limit the maximum number of bytes to\n\
571return (an incomplete line may be returned then). Return an empty\n\
572string at EOF.\n\
573");
574
575static PyObject *
576BZ2File_readline(BZ2FileObject *self, PyObject *args)
577{
578 PyObject *ret = NULL;
579 int sizehint = -1;
580
581 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
582 return NULL;
583
584 ACQUIRE_LOCK(self);
585 switch (self->mode) {
586 case MODE_READ:
587 break;
588 case MODE_READ_EOF:
589 ret = PyString_FromString("");
590 goto cleanup;
591 case MODE_CLOSED:
592 PyErr_SetString(PyExc_ValueError,
593 "I/O operation on closed file");
594 goto cleanup;
595 default:
596 PyErr_SetString(PyExc_IOError,
597 "file is not ready for reading");
598 goto cleanup;
599 }
600
601 if (sizehint == 0)
602 ret = PyString_FromString("");
603 else
604 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
605
606cleanup:
607 RELEASE_LOCK(self);
608 return ret;
609}
610
611PyDoc_STRVAR(BZ2File_readlines__doc__,
612"readlines([size]) -> list\n\
613\n\
614Call readline() repeatedly and return a list of lines read.\n\
615The optional size argument, if given, is an approximate bound on the\n\
616total number of bytes in the lines returned.\n\
617");
618
619/* This is a hacked version of Python's fileobject.c:file_readlines(). */
620static PyObject *
621BZ2File_readlines(BZ2FileObject *self, PyObject *args)
622{
623 long sizehint = 0;
624 PyObject *list = NULL;
625 PyObject *line;
626 char small_buffer[SMALLCHUNK];
627 char *buffer = small_buffer;
628 size_t buffersize = SMALLCHUNK;
629 PyObject *big_buffer = NULL;
630 size_t nfilled = 0;
631 size_t nread;
632 size_t totalread = 0;
633 char *p, *q, *end;
634 int err;
635 int shortread = 0;
636 int bzerror;
637
638 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
639 return NULL;
640
641 ACQUIRE_LOCK(self);
642 switch (self->mode) {
643 case MODE_READ:
644 break;
645 case MODE_READ_EOF:
646 list = PyList_New(0);
647 goto cleanup;
648 case MODE_CLOSED:
649 PyErr_SetString(PyExc_ValueError,
650 "I/O operation on closed file");
651 goto cleanup;
652 default:
653 PyErr_SetString(PyExc_IOError,
654 "file is not ready for reading");
655 goto cleanup;
656 }
657
658 if ((list = PyList_New(0)) == NULL)
659 goto cleanup;
660
661 for (;;) {
662 Py_BEGIN_ALLOW_THREADS
663 nread = Util_UnivNewlineRead(&bzerror, self->fp,
664 buffer+nfilled,
665 buffersize-nfilled, self);
666 self->pos += nread;
667 Py_END_ALLOW_THREADS
668 if (bzerror == BZ_STREAM_END) {
669 self->size = self->pos;
670 self->mode = MODE_READ_EOF;
671 if (nread == 0) {
672 sizehint = 0;
673 break;
674 }
675 shortread = 1;
676 } else if (bzerror != BZ_OK) {
677 Util_CatchBZ2Error(bzerror);
678 error:
679 Py_DECREF(list);
680 list = NULL;
681 goto cleanup;
682 }
683 totalread += nread;
684 p = memchr(buffer+nfilled, '\n', nread);
685 if (p == NULL) {
686 /* Need a larger buffer to fit this line */
687 nfilled += nread;
688 buffersize *= 2;
689 if (buffersize > INT_MAX) {
690 PyErr_SetString(PyExc_OverflowError,
691 "line is longer than a Python string can hold");
692 goto error;
693 }
694 if (big_buffer == NULL) {
695 /* Create the big buffer */
696 big_buffer = PyString_FromStringAndSize(
697 NULL, buffersize);
698 if (big_buffer == NULL)
699 goto error;
700 buffer = PyString_AS_STRING(big_buffer);
701 memcpy(buffer, small_buffer, nfilled);
702 }
703 else {
704 /* Grow the big buffer */
705 _PyString_Resize(&big_buffer, buffersize);
706 buffer = PyString_AS_STRING(big_buffer);
707 }
708 continue;
709 }
710 end = buffer+nfilled+nread;
711 q = buffer;
712 do {
713 /* Process complete lines */
714 p++;
715 line = PyString_FromStringAndSize(q, p-q);
716 if (line == NULL)
717 goto error;
718 err = PyList_Append(list, line);
719 Py_DECREF(line);
720 if (err != 0)
721 goto error;
722 q = p;
723 p = memchr(q, '\n', end-q);
724 } while (p != NULL);
725 /* Move the remaining incomplete line to the start */
726 nfilled = end-q;
727 memmove(buffer, q, nfilled);
728 if (sizehint > 0)
729 if (totalread >= (size_t)sizehint)
730 break;
731 if (shortread) {
732 sizehint = 0;
733 break;
734 }
735 }
736 if (nfilled != 0) {
737 /* Partial last line */
738 line = PyString_FromStringAndSize(buffer, nfilled);
739 if (line == NULL)
740 goto error;
741 if (sizehint > 0) {
742 /* Need to complete the last line */
743 PyObject *rest = Util_GetLine(self, 0);
744 if (rest == NULL) {
745 Py_DECREF(line);
746 goto error;
747 }
748 PyString_Concat(&line, rest);
749 Py_DECREF(rest);
750 if (line == NULL)
751 goto error;
752 }
753 err = PyList_Append(list, line);
754 Py_DECREF(line);
755 if (err != 0)
756 goto error;
757 }
758
759 cleanup:
760 RELEASE_LOCK(self);
761 if (big_buffer) {
762 Py_DECREF(big_buffer);
763 }
764 return list;
765}
766
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000767PyDoc_STRVAR(BZ2File_xreadlines__doc__,
768"xreadlines() -> self\n\
769\n\
770For backward compatibility. BZ2File objects now include the performance\n\
771optimizations previously implemented in the xreadlines module.\n\
772");
773
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000774PyDoc_STRVAR(BZ2File_write__doc__,
775"write(data) -> None\n\
776\n\
777Write the 'data' string to file. Note that due to buffering, close() may\n\
778be needed before the file on disk reflects the data written.\n\
779");
780
781/* This is a hacked version of Python's fileobject.c:file_write(). */
782static PyObject *
783BZ2File_write(BZ2FileObject *self, PyObject *args)
784{
785 PyObject *ret = NULL;
786 char *buf;
787 int len;
788 int bzerror;
789
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000790 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000791 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000792
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000793 ACQUIRE_LOCK(self);
794 switch (self->mode) {
795 case MODE_WRITE:
796 break;
Tim Peterse3228092002-11-09 04:21:44 +0000797
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000798 case MODE_CLOSED:
799 PyErr_SetString(PyExc_ValueError,
800 "I/O operation on closed file");
801 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000802
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000803 default:
804 PyErr_SetString(PyExc_IOError,
805 "file is not ready for writing");
806 goto cleanup;;
807 }
808
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000809 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810
811 Py_BEGIN_ALLOW_THREADS
812 BZ2_bzWrite (&bzerror, self->fp, buf, len);
813 self->pos += len;
814 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000815
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 if (bzerror != BZ_OK) {
817 Util_CatchBZ2Error(bzerror);
818 goto cleanup;
819 }
Tim Peterse3228092002-11-09 04:21:44 +0000820
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000821 Py_INCREF(Py_None);
822 ret = Py_None;
823
824cleanup:
825 RELEASE_LOCK(self);
826 return ret;
827}
828
829PyDoc_STRVAR(BZ2File_writelines__doc__,
830"writelines(sequence_of_strings) -> None\n\
831\n\
832Write the sequence of strings to the file. Note that newlines are not\n\
833added. The sequence can be any iterable object producing strings. This is\n\
834equivalent to calling write() for each string.\n\
835");
836
837/* This is a hacked version of Python's fileobject.c:file_writelines(). */
838static PyObject *
839BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
840{
841#define CHUNKSIZE 1000
842 PyObject *list = NULL;
843 PyObject *iter = NULL;
844 PyObject *ret = NULL;
845 PyObject *line;
846 int i, j, index, len, islist;
847 int bzerror;
848
849 ACQUIRE_LOCK(self);
850 islist = PyList_Check(seq);
851 if (!islist) {
852 iter = PyObject_GetIter(seq);
853 if (iter == NULL) {
854 PyErr_SetString(PyExc_TypeError,
855 "writelines() requires an iterable argument");
856 goto error;
857 }
858 list = PyList_New(CHUNKSIZE);
859 if (list == NULL)
860 goto error;
861 }
862
863 /* Strategy: slurp CHUNKSIZE lines into a private list,
864 checking that they are all strings, then write that list
865 without holding the interpreter lock, then come back for more. */
866 for (index = 0; ; index += CHUNKSIZE) {
867 if (islist) {
868 Py_XDECREF(list);
869 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
870 if (list == NULL)
871 goto error;
872 j = PyList_GET_SIZE(list);
873 }
874 else {
875 for (j = 0; j < CHUNKSIZE; j++) {
876 line = PyIter_Next(iter);
877 if (line == NULL) {
878 if (PyErr_Occurred())
879 goto error;
880 break;
881 }
882 PyList_SetItem(list, j, line);
883 }
884 }
885 if (j == 0)
886 break;
887
888 /* Check that all entries are indeed strings. If not,
889 apply the same rules as for file.write() and
890 convert the rets to strings. This is slow, but
891 seems to be the only way since all conversion APIs
892 could potentially execute Python code. */
893 for (i = 0; i < j; i++) {
894 PyObject *v = PyList_GET_ITEM(list, i);
895 if (!PyString_Check(v)) {
896 const char *buffer;
897 int len;
898 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
899 PyErr_SetString(PyExc_TypeError,
900 "writelines() "
901 "argument must be "
902 "a sequence of "
903 "strings");
904 goto error;
905 }
906 line = PyString_FromStringAndSize(buffer,
907 len);
908 if (line == NULL)
909 goto error;
910 Py_DECREF(v);
911 PyList_SET_ITEM(list, i, line);
912 }
913 }
914
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000915 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000916
917 /* Since we are releasing the global lock, the
918 following code may *not* execute Python code. */
919 Py_BEGIN_ALLOW_THREADS
920 for (i = 0; i < j; i++) {
921 line = PyList_GET_ITEM(list, i);
922 len = PyString_GET_SIZE(line);
923 BZ2_bzWrite (&bzerror, self->fp,
924 PyString_AS_STRING(line), len);
925 if (bzerror != BZ_OK) {
926 Py_BLOCK_THREADS
927 Util_CatchBZ2Error(bzerror);
928 goto error;
929 }
930 }
931 Py_END_ALLOW_THREADS
932
933 if (j < CHUNKSIZE)
934 break;
935 }
936
937 Py_INCREF(Py_None);
938 ret = Py_None;
939
940 error:
941 RELEASE_LOCK(self);
942 Py_XDECREF(list);
943 Py_XDECREF(iter);
944 return ret;
945#undef CHUNKSIZE
946}
947
948PyDoc_STRVAR(BZ2File_seek__doc__,
949"seek(offset [, whence]) -> None\n\
950\n\
951Move to new file position. Argument offset is a byte count. Optional\n\
952argument whence defaults to 0 (offset from start of file, offset\n\
953should be >= 0); other values are 1 (move relative to current position,\n\
954positive or negative), and 2 (move relative to end of file, usually\n\
955negative, although many platforms allow seeking beyond the end of a file).\n\
956\n\
957Note that seeking of bz2 files is emulated, and depending on the parameters\n\
958the operation may be extremely slow.\n\
959");
960
961static PyObject *
962BZ2File_seek(BZ2FileObject *self, PyObject *args)
963{
964 int where = 0;
965 long offset;
966 char small_buffer[SMALLCHUNK];
967 char *buffer = small_buffer;
968 size_t buffersize = SMALLCHUNK;
969 int bytesread = 0;
970 int readsize;
971 int chunksize;
972 int bzerror;
973 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000974 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000975
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000976 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
977 return NULL;
978
979 ACQUIRE_LOCK(self);
980 Util_DropReadAhead(self);
981 switch (self->mode) {
982 case MODE_READ:
983 case MODE_READ_EOF:
984 break;
Tim Peterse3228092002-11-09 04:21:44 +0000985
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 case MODE_CLOSED:
987 PyErr_SetString(PyExc_ValueError,
988 "I/O operation on closed file");
989 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000990
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000991 default:
992 PyErr_SetString(PyExc_IOError,
993 "seek works only while reading");
994 goto cleanup;;
995 }
996
997 if (offset < 0) {
998 if (where == 1) {
999 offset = self->pos + offset;
1000 rewind = 1;
1001 } else if (where == 2) {
1002 if (self->size == -1) {
1003 assert(self->mode != MODE_READ_EOF);
1004 for (;;) {
1005 Py_BEGIN_ALLOW_THREADS
1006 chunksize = Util_UnivNewlineRead(
1007 &bzerror, self->fp,
1008 buffer, buffersize,
1009 self);
1010 self->pos += chunksize;
1011 Py_END_ALLOW_THREADS
1012
1013 bytesread += chunksize;
1014 if (bzerror == BZ_STREAM_END) {
1015 break;
1016 } else if (bzerror != BZ_OK) {
1017 Util_CatchBZ2Error(bzerror);
1018 goto cleanup;
1019 }
1020 }
1021 self->mode = MODE_READ_EOF;
1022 self->size = self->pos;
1023 bytesread = 0;
1024 }
1025 offset = self->size + offset;
1026 if (offset >= self->pos)
1027 offset -= self->pos;
1028 else
1029 rewind = 1;
1030 }
1031 if (offset < 0)
1032 offset = 0;
1033 } else if (where == 0) {
1034 if (offset >= self->pos)
1035 offset -= self->pos;
1036 else
1037 rewind = 1;
1038 }
1039
1040 if (rewind) {
1041 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001042 if (bzerror != BZ_OK) {
1043 Util_CatchBZ2Error(bzerror);
1044 goto cleanup;
1045 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001046 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047 if (!ret)
1048 goto cleanup;
1049 Py_DECREF(ret);
1050 ret = NULL;
1051 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001052 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001053 0, 0, NULL, 0);
1054 if (bzerror != BZ_OK) {
1055 Util_CatchBZ2Error(bzerror);
1056 goto cleanup;
1057 }
1058 self->mode = MODE_READ;
1059 } else if (self->mode == MODE_READ_EOF) {
1060 goto exit;
1061 }
1062
1063 if (offset == 0)
1064 goto exit;
1065
1066 /* Before getting here, offset must be set to the number of bytes
1067 * to walk forward. */
1068 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001069 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 readsize = buffersize;
1071 else
1072 readsize = offset-bytesread;
1073 Py_BEGIN_ALLOW_THREADS
1074 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1075 buffer, readsize, self);
1076 self->pos += chunksize;
1077 Py_END_ALLOW_THREADS
1078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 self->size = self->pos;
1081 self->mode = MODE_READ_EOF;
1082 break;
1083 } else if (bzerror != BZ_OK) {
1084 Util_CatchBZ2Error(bzerror);
1085 goto cleanup;
1086 }
1087 if (bytesread == offset)
1088 break;
1089 }
1090
1091exit:
1092 Py_INCREF(Py_None);
1093 ret = Py_None;
1094
1095cleanup:
1096 RELEASE_LOCK(self);
1097 return ret;
1098}
1099
1100PyDoc_STRVAR(BZ2File_tell__doc__,
1101"tell() -> int\n\
1102\n\
1103Return the current file position, an integer (may be a long integer).\n\
1104");
1105
1106static PyObject *
1107BZ2File_tell(BZ2FileObject *self, PyObject *args)
1108{
1109 PyObject *ret = NULL;
1110
1111 if (self->mode == MODE_CLOSED) {
1112 PyErr_SetString(PyExc_ValueError,
1113 "I/O operation on closed file");
1114 goto cleanup;
1115 }
1116
1117 ret = PyInt_FromLong(self->pos);
1118
1119cleanup:
1120 return ret;
1121}
1122
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001123PyDoc_STRVAR(BZ2File_close__doc__,
1124"close() -> None or (perhaps) an integer\n\
1125\n\
1126Close the file. Sets data attribute .closed to true. A closed file\n\
1127cannot be used for further I/O operations. close() may be called more\n\
1128than once without error.\n\
1129");
1130
1131static PyObject *
1132BZ2File_close(BZ2FileObject *self)
1133{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001134 PyObject *ret = NULL;
1135 int bzerror = BZ_OK;
1136
1137 ACQUIRE_LOCK(self);
1138 switch (self->mode) {
1139 case MODE_READ:
1140 case MODE_READ_EOF:
1141 BZ2_bzReadClose(&bzerror, self->fp);
1142 break;
1143 case MODE_WRITE:
1144 BZ2_bzWriteClose(&bzerror, self->fp,
1145 0, NULL, NULL);
1146 break;
1147 }
1148 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001149 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001150 if (bzerror != BZ_OK) {
1151 Util_CatchBZ2Error(bzerror);
1152 Py_XDECREF(ret);
1153 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154 }
1155
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156 RELEASE_LOCK(self);
1157 return ret;
1158}
1159
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001160static PyObject *BZ2File_getiter(BZ2FileObject *self);
1161
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001162static PyMethodDef BZ2File_methods[] = {
1163 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001166 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1168 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1169 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1170 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001171 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1172 {NULL, NULL} /* sentinel */
1173};
1174
1175
1176/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001177/* Getters and setters of BZ2File. */
1178
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001179/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1180static PyObject *
1181BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1182{
1183 switch (self->f_newlinetypes) {
1184 case NEWLINE_UNKNOWN:
1185 Py_INCREF(Py_None);
1186 return Py_None;
1187 case NEWLINE_CR:
1188 return PyString_FromString("\r");
1189 case NEWLINE_LF:
1190 return PyString_FromString("\n");
1191 case NEWLINE_CR|NEWLINE_LF:
1192 return Py_BuildValue("(ss)", "\r", "\n");
1193 case NEWLINE_CRLF:
1194 return PyString_FromString("\r\n");
1195 case NEWLINE_CR|NEWLINE_CRLF:
1196 return Py_BuildValue("(ss)", "\r", "\r\n");
1197 case NEWLINE_LF|NEWLINE_CRLF:
1198 return Py_BuildValue("(ss)", "\n", "\r\n");
1199 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1200 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1201 default:
1202 PyErr_Format(PyExc_SystemError,
1203 "Unknown newlines value 0x%x\n",
1204 self->f_newlinetypes);
1205 return NULL;
1206 }
1207}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001208
1209static PyObject *
1210BZ2File_get_closed(BZ2FileObject *self, void *closure)
1211{
1212 return PyInt_FromLong(self->mode == MODE_CLOSED);
1213}
1214
1215static PyObject *
1216BZ2File_get_mode(BZ2FileObject *self, void *closure)
1217{
1218 return PyObject_GetAttrString(self->file, "mode");
1219}
1220
1221static PyObject *
1222BZ2File_get_name(BZ2FileObject *self, void *closure)
1223{
1224 return PyObject_GetAttrString(self->file, "name");
1225}
1226
1227static PyGetSetDef BZ2File_getset[] = {
1228 {"closed", (getter)BZ2File_get_closed, NULL,
1229 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001230 {"newlines", (getter)BZ2File_get_newlines, NULL,
1231 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001232 {"mode", (getter)BZ2File_get_mode, NULL,
1233 "file mode ('r', 'w', or 'U')"},
1234 {"name", (getter)BZ2File_get_name, NULL,
1235 "file name"},
1236 {NULL} /* Sentinel */
1237};
1238
1239
1240/* ===================================================================== */
1241/* Members of BZ2File_Type. */
1242
1243#undef OFF
1244#define OFF(x) offsetof(BZ2FileObject, x)
1245
1246static PyMemberDef BZ2File_members[] = {
1247 {"softspace", T_INT, OFF(f_softspace), 0,
1248 "flag indicating that a space needs to be printed; used by print"},
1249 {NULL} /* Sentinel */
1250};
1251
1252/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001253/* Slot definitions for BZ2File_Type. */
1254
1255static int
1256BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1257{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001258 static char *kwlist[] = {"filename", "mode", "buffering",
1259 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001260 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001261 char *mode = "r";
1262 int buffering = -1;
1263 int compresslevel = 9;
1264 int bzerror;
1265 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001266
1267 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001268
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001269 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1270 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271 &compresslevel))
1272 return -1;
1273
1274 if (compresslevel < 1 || compresslevel > 9) {
1275 PyErr_SetString(PyExc_ValueError,
1276 "compresslevel must be between 1 and 9");
1277 return -1;
1278 }
1279
1280 for (;;) {
1281 int error = 0;
1282 switch (*mode) {
1283 case 'r':
1284 case 'w':
1285 if (mode_char)
1286 error = 1;
1287 mode_char = *mode;
1288 break;
1289
1290 case 'b':
1291 break;
1292
1293 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001294 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001295 break;
1296
1297 default:
1298 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001299 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001300 }
1301 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001302 PyErr_Format(PyExc_ValueError,
1303 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001304 return -1;
1305 }
1306 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001307 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001308 break;
1309 }
1310
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001311 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001312
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001313 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1314 name, mode, buffering);
1315 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001316 return -1;
1317
1318 /* From now on, we have stuff to dealloc, so jump to error label
1319 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001320
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001321#ifdef WITH_THREAD
1322 self->lock = PyThread_allocate_lock();
1323 if (!self->lock)
1324 goto error;
1325#endif
1326
1327 if (mode_char == 'r')
1328 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001329 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001330 0, 0, NULL, 0);
1331 else
1332 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001333 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001334 compresslevel, 0, 0);
1335
1336 if (bzerror != BZ_OK) {
1337 Util_CatchBZ2Error(bzerror);
1338 goto error;
1339 }
1340
1341 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1342
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343 return 0;
1344
1345error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001346 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347#ifdef WITH_THREAD
1348 if (self->lock)
1349 PyThread_free_lock(self->lock);
1350#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001351 return -1;
1352}
1353
1354static void
1355BZ2File_dealloc(BZ2FileObject *self)
1356{
1357 int bzerror;
1358#ifdef WITH_THREAD
1359 if (self->lock)
1360 PyThread_free_lock(self->lock);
1361#endif
1362 switch (self->mode) {
1363 case MODE_READ:
1364 case MODE_READ_EOF:
1365 BZ2_bzReadClose(&bzerror, self->fp);
1366 break;
1367 case MODE_WRITE:
1368 BZ2_bzWriteClose(&bzerror, self->fp,
1369 0, NULL, NULL);
1370 break;
1371 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001372 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001373 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001374 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375}
1376
1377/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1378static PyObject *
1379BZ2File_getiter(BZ2FileObject *self)
1380{
1381 if (self->mode == MODE_CLOSED) {
1382 PyErr_SetString(PyExc_ValueError,
1383 "I/O operation on closed file");
1384 return NULL;
1385 }
1386 Py_INCREF((PyObject*)self);
1387 return (PyObject *)self;
1388}
1389
1390/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1391#define READAHEAD_BUFSIZE 8192
1392static PyObject *
1393BZ2File_iternext(BZ2FileObject *self)
1394{
1395 PyStringObject* ret;
1396 ACQUIRE_LOCK(self);
1397 if (self->mode == MODE_CLOSED) {
1398 PyErr_SetString(PyExc_ValueError,
1399 "I/O operation on closed file");
1400 return NULL;
1401 }
1402 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1403 RELEASE_LOCK(self);
1404 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1405 Py_XDECREF(ret);
1406 return NULL;
1407 }
1408 return (PyObject *)ret;
1409}
1410
1411/* ===================================================================== */
1412/* BZ2File_Type definition. */
1413
1414PyDoc_VAR(BZ2File__doc__) =
1415PyDoc_STR(
1416"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1417\n\
1418Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1419writing. When opened for writing, the file will be created if it doesn't\n\
1420exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1421unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1422is given, must be a number between 1 and 9.\n\
1423")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001424PyDoc_STR(
1425"\n\
1426Add a 'U' to mode to open the file for input with universal newline\n\
1427support. Any line ending in the input file will be seen as a '\\n' in\n\
1428Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1429for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1430'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1431newlines are available only when reading.\n\
1432")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001433;
1434
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001435static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001436 PyObject_HEAD_INIT(NULL)
1437 0, /*ob_size*/
1438 "bz2.BZ2File", /*tp_name*/
1439 sizeof(BZ2FileObject), /*tp_basicsize*/
1440 0, /*tp_itemsize*/
1441 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1442 0, /*tp_print*/
1443 0, /*tp_getattr*/
1444 0, /*tp_setattr*/
1445 0, /*tp_compare*/
1446 0, /*tp_repr*/
1447 0, /*tp_as_number*/
1448 0, /*tp_as_sequence*/
1449 0, /*tp_as_mapping*/
1450 0, /*tp_hash*/
1451 0, /*tp_call*/
1452 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001453 PyObject_GenericGetAttr,/*tp_getattro*/
1454 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001455 0, /*tp_as_buffer*/
1456 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1457 BZ2File__doc__, /*tp_doc*/
1458 0, /*tp_traverse*/
1459 0, /*tp_clear*/
1460 0, /*tp_richcompare*/
1461 0, /*tp_weaklistoffset*/
1462 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1463 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1464 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001465 BZ2File_members, /*tp_members*/
1466 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467 0, /*tp_base*/
1468 0, /*tp_dict*/
1469 0, /*tp_descr_get*/
1470 0, /*tp_descr_set*/
1471 0, /*tp_dictoffset*/
1472 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001473 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001474 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001475 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001476 0, /*tp_is_gc*/
1477};
1478
1479
1480/* ===================================================================== */
1481/* Methods of BZ2Comp. */
1482
1483PyDoc_STRVAR(BZ2Comp_compress__doc__,
1484"compress(data) -> string\n\
1485\n\
1486Provide more data to the compressor object. It will return chunks of\n\
1487compressed data whenever possible. When you've finished providing data\n\
1488to compress, call the flush() method to finish the compression process,\n\
1489and return what is left in the internal buffers.\n\
1490");
1491
1492static PyObject *
1493BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1494{
1495 char *data;
1496 int datasize;
1497 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001498 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001499 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001500 bz_stream *bzs = &self->bzs;
1501 int bzerror;
1502
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001503 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504 return NULL;
1505
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001506 if (datasize == 0)
1507 return PyString_FromString("");
1508
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001509 ACQUIRE_LOCK(self);
1510 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001511 PyErr_SetString(PyExc_ValueError,
1512 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001513 goto error;
1514 }
1515
1516 ret = PyString_FromStringAndSize(NULL, bufsize);
1517 if (!ret)
1518 goto error;
1519
1520 bzs->next_in = data;
1521 bzs->avail_in = datasize;
1522 bzs->next_out = BUF(ret);
1523 bzs->avail_out = bufsize;
1524
1525 totalout = BZS_TOTAL_OUT(bzs);
1526
1527 for (;;) {
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror != BZ_RUN_OK) {
1532 Util_CatchBZ2Error(bzerror);
1533 goto error;
1534 }
1535 if (bzs->avail_out == 0) {
1536 bufsize = Util_NewBufferSize(bufsize);
1537 if (_PyString_Resize(&ret, bufsize) < 0) {
1538 BZ2_bzCompressEnd(bzs);
1539 goto error;
1540 }
1541 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1542 - totalout);
1543 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1544 } else if (bzs->avail_in == 0) {
1545 break;
1546 }
1547 }
1548
Tim Petersf29f0c62002-11-09 04:28:17 +00001549 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001550
1551 RELEASE_LOCK(self);
1552 return ret;
1553
1554error:
1555 RELEASE_LOCK(self);
1556 Py_XDECREF(ret);
1557 return NULL;
1558}
1559
1560PyDoc_STRVAR(BZ2Comp_flush__doc__,
1561"flush() -> string\n\
1562\n\
1563Finish the compression process and return what is left in internal buffers.\n\
1564You must not use the compressor object after calling this method.\n\
1565");
1566
1567static PyObject *
1568BZ2Comp_flush(BZ2CompObject *self)
1569{
1570 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001571 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001572 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001573 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574 int bzerror;
1575
1576 ACQUIRE_LOCK(self);
1577 if (!self->running) {
1578 PyErr_SetString(PyExc_ValueError, "object was already "
1579 "flushed");
1580 goto error;
1581 }
1582 self->running = 0;
1583
1584 ret = PyString_FromStringAndSize(NULL, bufsize);
1585 if (!ret)
1586 goto error;
1587
1588 bzs->next_out = BUF(ret);
1589 bzs->avail_out = bufsize;
1590
1591 totalout = BZS_TOTAL_OUT(bzs);
1592
1593 for (;;) {
1594 Py_BEGIN_ALLOW_THREADS
1595 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1596 Py_END_ALLOW_THREADS
1597 if (bzerror == BZ_STREAM_END) {
1598 break;
1599 } else if (bzerror != BZ_FINISH_OK) {
1600 Util_CatchBZ2Error(bzerror);
1601 goto error;
1602 }
1603 if (bzs->avail_out == 0) {
1604 bufsize = Util_NewBufferSize(bufsize);
1605 if (_PyString_Resize(&ret, bufsize) < 0)
1606 goto error;
1607 bzs->next_out = BUF(ret);
1608 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1609 - totalout);
1610 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1611 }
1612 }
1613
1614 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001615 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001616
1617 RELEASE_LOCK(self);
1618 return ret;
1619
1620error:
1621 RELEASE_LOCK(self);
1622 Py_XDECREF(ret);
1623 return NULL;
1624}
1625
1626static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001627 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1628 BZ2Comp_compress__doc__},
1629 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1630 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001631 {NULL, NULL} /* sentinel */
1632};
1633
1634
1635/* ===================================================================== */
1636/* Slot definitions for BZ2Comp_Type. */
1637
1638static int
1639BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1640{
1641 int compresslevel = 9;
1642 int bzerror;
1643 static char *kwlist[] = {"compresslevel", 0};
1644
1645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1646 kwlist, &compresslevel))
1647 return -1;
1648
1649 if (compresslevel < 1 || compresslevel > 9) {
1650 PyErr_SetString(PyExc_ValueError,
1651 "compresslevel must be between 1 and 9");
1652 goto error;
1653 }
1654
1655#ifdef WITH_THREAD
1656 self->lock = PyThread_allocate_lock();
1657 if (!self->lock)
1658 goto error;
1659#endif
1660
1661 memset(&self->bzs, 0, sizeof(bz_stream));
1662 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1663 if (bzerror != BZ_OK) {
1664 Util_CatchBZ2Error(bzerror);
1665 goto error;
1666 }
1667
1668 self->running = 1;
1669
1670 return 0;
1671error:
1672#ifdef WITH_THREAD
1673 if (self->lock)
1674 PyThread_free_lock(self->lock);
1675#endif
1676 return -1;
1677}
1678
1679static void
1680BZ2Comp_dealloc(BZ2CompObject *self)
1681{
1682#ifdef WITH_THREAD
1683 if (self->lock)
1684 PyThread_free_lock(self->lock);
1685#endif
1686 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001687 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001688}
1689
1690
1691/* ===================================================================== */
1692/* BZ2Comp_Type definition. */
1693
1694PyDoc_STRVAR(BZ2Comp__doc__,
1695"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1696\n\
1697Create a new compressor object. This object may be used to compress\n\
1698data sequentially. If you want to compress data in one shot, use the\n\
1699compress() function instead. The compresslevel parameter, if given,\n\
1700must be a number between 1 and 9.\n\
1701");
1702
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001703static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001704 PyObject_HEAD_INIT(NULL)
1705 0, /*ob_size*/
1706 "bz2.BZ2Compressor", /*tp_name*/
1707 sizeof(BZ2CompObject), /*tp_basicsize*/
1708 0, /*tp_itemsize*/
1709 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1710 0, /*tp_print*/
1711 0, /*tp_getattr*/
1712 0, /*tp_setattr*/
1713 0, /*tp_compare*/
1714 0, /*tp_repr*/
1715 0, /*tp_as_number*/
1716 0, /*tp_as_sequence*/
1717 0, /*tp_as_mapping*/
1718 0, /*tp_hash*/
1719 0, /*tp_call*/
1720 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001721 PyObject_GenericGetAttr,/*tp_getattro*/
1722 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001723 0, /*tp_as_buffer*/
1724 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1725 BZ2Comp__doc__, /*tp_doc*/
1726 0, /*tp_traverse*/
1727 0, /*tp_clear*/
1728 0, /*tp_richcompare*/
1729 0, /*tp_weaklistoffset*/
1730 0, /*tp_iter*/
1731 0, /*tp_iternext*/
1732 BZ2Comp_methods, /*tp_methods*/
1733 0, /*tp_members*/
1734 0, /*tp_getset*/
1735 0, /*tp_base*/
1736 0, /*tp_dict*/
1737 0, /*tp_descr_get*/
1738 0, /*tp_descr_set*/
1739 0, /*tp_dictoffset*/
1740 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001741 PyType_GenericAlloc, /*tp_alloc*/
1742 PyType_GenericNew, /*tp_new*/
1743 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744 0, /*tp_is_gc*/
1745};
1746
1747
1748/* ===================================================================== */
1749/* Members of BZ2Decomp. */
1750
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001751#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001752#define OFF(x) offsetof(BZ2DecompObject, x)
1753
1754static PyMemberDef BZ2Decomp_members[] = {
1755 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1756 {NULL} /* Sentinel */
1757};
1758
1759
1760/* ===================================================================== */
1761/* Methods of BZ2Decomp. */
1762
1763PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1764"decompress(data) -> string\n\
1765\n\
1766Provide more data to the decompressor object. It will return chunks\n\
1767of decompressed data whenever possible. If you try to decompress data\n\
1768after the end of stream is found, EOFError will be raised. If any data\n\
1769was found after the end of stream, it'll be ignored and saved in\n\
1770unused_data attribute.\n\
1771");
1772
1773static PyObject *
1774BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1775{
1776 char *data;
1777 int datasize;
1778 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001779 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001780 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001781 bz_stream *bzs = &self->bzs;
1782 int bzerror;
1783
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001784 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001785 return NULL;
1786
1787 ACQUIRE_LOCK(self);
1788 if (!self->running) {
1789 PyErr_SetString(PyExc_EOFError, "end of stream was "
1790 "already found");
1791 goto error;
1792 }
1793
1794 ret = PyString_FromStringAndSize(NULL, bufsize);
1795 if (!ret)
1796 goto error;
1797
1798 bzs->next_in = data;
1799 bzs->avail_in = datasize;
1800 bzs->next_out = BUF(ret);
1801 bzs->avail_out = bufsize;
1802
1803 totalout = BZS_TOTAL_OUT(bzs);
1804
1805 for (;;) {
1806 Py_BEGIN_ALLOW_THREADS
1807 bzerror = BZ2_bzDecompress(bzs);
1808 Py_END_ALLOW_THREADS
1809 if (bzerror == BZ_STREAM_END) {
1810 if (bzs->avail_in != 0) {
1811 Py_DECREF(self->unused_data);
1812 self->unused_data =
1813 PyString_FromStringAndSize(bzs->next_in,
1814 bzs->avail_in);
1815 }
1816 self->running = 0;
1817 break;
1818 }
1819 if (bzerror != BZ_OK) {
1820 Util_CatchBZ2Error(bzerror);
1821 goto error;
1822 }
1823 if (bzs->avail_out == 0) {
1824 bufsize = Util_NewBufferSize(bufsize);
1825 if (_PyString_Resize(&ret, bufsize) < 0) {
1826 BZ2_bzDecompressEnd(bzs);
1827 goto error;
1828 }
1829 bzs->next_out = BUF(ret);
1830 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1831 - totalout);
1832 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1833 } else if (bzs->avail_in == 0) {
1834 break;
1835 }
1836 }
1837
1838 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001839 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001840
1841 RELEASE_LOCK(self);
1842 return ret;
1843
1844error:
1845 RELEASE_LOCK(self);
1846 Py_XDECREF(ret);
1847 return NULL;
1848}
1849
1850static PyMethodDef BZ2Decomp_methods[] = {
1851 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1852 {NULL, NULL} /* sentinel */
1853};
1854
1855
1856/* ===================================================================== */
1857/* Slot definitions for BZ2Decomp_Type. */
1858
1859static int
1860BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1861{
1862 int bzerror;
1863
1864 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1865 return -1;
1866
1867#ifdef WITH_THREAD
1868 self->lock = PyThread_allocate_lock();
1869 if (!self->lock)
1870 goto error;
1871#endif
1872
1873 self->unused_data = PyString_FromString("");
1874 if (!self->unused_data)
1875 goto error;
1876
1877 memset(&self->bzs, 0, sizeof(bz_stream));
1878 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1879 if (bzerror != BZ_OK) {
1880 Util_CatchBZ2Error(bzerror);
1881 goto error;
1882 }
1883
1884 self->running = 1;
1885
1886 return 0;
1887
1888error:
1889#ifdef WITH_THREAD
1890 if (self->lock)
1891 PyThread_free_lock(self->lock);
1892#endif
1893 Py_XDECREF(self->unused_data);
1894 return -1;
1895}
1896
1897static void
1898BZ2Decomp_dealloc(BZ2DecompObject *self)
1899{
1900#ifdef WITH_THREAD
1901 if (self->lock)
1902 PyThread_free_lock(self->lock);
1903#endif
1904 Py_XDECREF(self->unused_data);
1905 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001906 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001907}
1908
1909
1910/* ===================================================================== */
1911/* BZ2Decomp_Type definition. */
1912
1913PyDoc_STRVAR(BZ2Decomp__doc__,
1914"BZ2Decompressor() -> decompressor object\n\
1915\n\
1916Create a new decompressor object. This object may be used to decompress\n\
1917data sequentially. If you want to decompress data in one shot, use the\n\
1918decompress() function instead.\n\
1919");
1920
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001921static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001922 PyObject_HEAD_INIT(NULL)
1923 0, /*ob_size*/
1924 "bz2.BZ2Decompressor", /*tp_name*/
1925 sizeof(BZ2DecompObject), /*tp_basicsize*/
1926 0, /*tp_itemsize*/
1927 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1928 0, /*tp_print*/
1929 0, /*tp_getattr*/
1930 0, /*tp_setattr*/
1931 0, /*tp_compare*/
1932 0, /*tp_repr*/
1933 0, /*tp_as_number*/
1934 0, /*tp_as_sequence*/
1935 0, /*tp_as_mapping*/
1936 0, /*tp_hash*/
1937 0, /*tp_call*/
1938 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001939 PyObject_GenericGetAttr,/*tp_getattro*/
1940 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941 0, /*tp_as_buffer*/
1942 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1943 BZ2Decomp__doc__, /*tp_doc*/
1944 0, /*tp_traverse*/
1945 0, /*tp_clear*/
1946 0, /*tp_richcompare*/
1947 0, /*tp_weaklistoffset*/
1948 0, /*tp_iter*/
1949 0, /*tp_iternext*/
1950 BZ2Decomp_methods, /*tp_methods*/
1951 BZ2Decomp_members, /*tp_members*/
1952 0, /*tp_getset*/
1953 0, /*tp_base*/
1954 0, /*tp_dict*/
1955 0, /*tp_descr_get*/
1956 0, /*tp_descr_set*/
1957 0, /*tp_dictoffset*/
1958 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001959 PyType_GenericAlloc, /*tp_alloc*/
1960 PyType_GenericNew, /*tp_new*/
1961 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001962 0, /*tp_is_gc*/
1963};
1964
1965
1966/* ===================================================================== */
1967/* Module functions. */
1968
1969PyDoc_STRVAR(bz2_compress__doc__,
1970"compress(data [, compresslevel=9]) -> string\n\
1971\n\
1972Compress data in one shot. If you want to compress data sequentially,\n\
1973use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1974given, must be a number between 1 and 9.\n\
1975");
1976
1977static PyObject *
1978bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1979{
1980 int compresslevel=9;
1981 char *data;
1982 int datasize;
1983 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001984 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001985 bz_stream _bzs;
1986 bz_stream *bzs = &_bzs;
1987 int bzerror;
1988 static char *kwlist[] = {"data", "compresslevel", 0};
1989
1990 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1991 kwlist, &data, &datasize,
1992 &compresslevel))
1993 return NULL;
1994
1995 if (compresslevel < 1 || compresslevel > 9) {
1996 PyErr_SetString(PyExc_ValueError,
1997 "compresslevel must be between 1 and 9");
1998 return NULL;
1999 }
2000
2001 /* Conforming to bz2 manual, this is large enough to fit compressed
2002 * data in one shot. We will check it later anyway. */
2003 bufsize = datasize + (datasize/100+1) + 600;
2004
2005 ret = PyString_FromStringAndSize(NULL, bufsize);
2006 if (!ret)
2007 return NULL;
2008
2009 memset(bzs, 0, sizeof(bz_stream));
2010
2011 bzs->next_in = data;
2012 bzs->avail_in = datasize;
2013 bzs->next_out = BUF(ret);
2014 bzs->avail_out = bufsize;
2015
2016 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2017 if (bzerror != BZ_OK) {
2018 Util_CatchBZ2Error(bzerror);
2019 Py_DECREF(ret);
2020 return NULL;
2021 }
Tim Peterse3228092002-11-09 04:21:44 +00002022
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023 for (;;) {
2024 Py_BEGIN_ALLOW_THREADS
2025 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2026 Py_END_ALLOW_THREADS
2027 if (bzerror == BZ_STREAM_END) {
2028 break;
2029 } else if (bzerror != BZ_FINISH_OK) {
2030 BZ2_bzCompressEnd(bzs);
2031 Util_CatchBZ2Error(bzerror);
2032 Py_DECREF(ret);
2033 return NULL;
2034 }
2035 if (bzs->avail_out == 0) {
2036 bufsize = Util_NewBufferSize(bufsize);
2037 if (_PyString_Resize(&ret, bufsize) < 0) {
2038 BZ2_bzCompressEnd(bzs);
2039 Py_DECREF(ret);
2040 return NULL;
2041 }
2042 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2043 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2044 }
2045 }
2046
2047 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002048 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002049 BZ2_bzCompressEnd(bzs);
2050
2051 return ret;
2052}
2053
2054PyDoc_STRVAR(bz2_decompress__doc__,
2055"decompress(data) -> decompressed data\n\
2056\n\
2057Decompress data in one shot. If you want to decompress data sequentially,\n\
2058use an instance of BZ2Decompressor instead.\n\
2059");
2060
2061static PyObject *
2062bz2_decompress(PyObject *self, PyObject *args)
2063{
2064 char *data;
2065 int datasize;
2066 int bufsize = SMALLCHUNK;
2067 PyObject *ret;
2068 bz_stream _bzs;
2069 bz_stream *bzs = &_bzs;
2070 int bzerror;
2071
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002072 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002073 return NULL;
2074
2075 if (datasize == 0)
2076 return PyString_FromString("");
2077
2078 ret = PyString_FromStringAndSize(NULL, bufsize);
2079 if (!ret)
2080 return NULL;
2081
2082 memset(bzs, 0, sizeof(bz_stream));
2083
2084 bzs->next_in = data;
2085 bzs->avail_in = datasize;
2086 bzs->next_out = BUF(ret);
2087 bzs->avail_out = bufsize;
2088
2089 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2090 if (bzerror != BZ_OK) {
2091 Util_CatchBZ2Error(bzerror);
2092 Py_DECREF(ret);
2093 return NULL;
2094 }
Tim Peterse3228092002-11-09 04:21:44 +00002095
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002096 for (;;) {
2097 Py_BEGIN_ALLOW_THREADS
2098 bzerror = BZ2_bzDecompress(bzs);
2099 Py_END_ALLOW_THREADS
2100 if (bzerror == BZ_STREAM_END) {
2101 break;
2102 } else if (bzerror != BZ_OK) {
2103 BZ2_bzDecompressEnd(bzs);
2104 Util_CatchBZ2Error(bzerror);
2105 Py_DECREF(ret);
2106 return NULL;
2107 }
2108 if (bzs->avail_out == 0) {
2109 bufsize = Util_NewBufferSize(bufsize);
2110 if (_PyString_Resize(&ret, bufsize) < 0) {
2111 BZ2_bzDecompressEnd(bzs);
2112 Py_DECREF(ret);
2113 return NULL;
2114 }
2115 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2116 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2117 } else if (bzs->avail_in == 0) {
2118 BZ2_bzDecompressEnd(bzs);
2119 PyErr_SetString(PyExc_ValueError,
2120 "couldn't find end of stream");
2121 Py_DECREF(ret);
2122 return NULL;
2123 }
2124 }
2125
2126 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002127 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002128 BZ2_bzDecompressEnd(bzs);
2129
2130 return ret;
2131}
2132
2133static PyMethodDef bz2_methods[] = {
2134 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2135 bz2_compress__doc__},
2136 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2137 bz2_decompress__doc__},
2138 {NULL, NULL} /* sentinel */
2139};
2140
2141/* ===================================================================== */
2142/* Initialization function. */
2143
2144PyDoc_STRVAR(bz2__doc__,
2145"The python bz2 module provides a comprehensive interface for\n\
2146the bz2 compression library. It implements a complete file\n\
2147interface, one shot (de)compression functions, and types for\n\
2148sequential (de)compression.\n\
2149");
2150
Neal Norwitz21d896c2003-07-01 20:15:21 +00002151PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002152initbz2(void)
2153{
2154 PyObject *m;
2155
2156 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157 BZ2Comp_Type.ob_type = &PyType_Type;
2158 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002159
2160 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2161
2162 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2163
2164 Py_INCREF(&BZ2File_Type);
2165 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2166
2167 Py_INCREF(&BZ2Comp_Type);
2168 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2169
2170 Py_INCREF(&BZ2Decomp_Type);
2171 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2172}