blob: b9874eb07e8949c0ef5e8c4db2b3be8e25d1c7c8 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000034
35#ifdef BZ_CONFIG_ERROR
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#if SIZEOF_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#elif SIZEOF_LONG_LONG >= 8
41#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000042 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000043#else
44#define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46#endif
47
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000048#else /* ! BZ_CONFIG_ERROR */
49
50#define BZ2_bzRead bzRead
51#define BZ2_bzReadOpen bzReadOpen
52#define BZ2_bzReadClose bzReadClose
53#define BZ2_bzWrite bzWrite
54#define BZ2_bzWriteOpen bzWriteOpen
55#define BZ2_bzWriteClose bzWriteClose
56#define BZ2_bzCompress bzCompress
57#define BZ2_bzCompressInit bzCompressInit
58#define BZ2_bzCompressEnd bzCompressEnd
59#define BZ2_bzDecompress bzDecompress
60#define BZ2_bzDecompressInit bzDecompressInit
61#define BZ2_bzDecompressEnd bzDecompressEnd
62
63#define BZS_TOTAL_OUT(bzs) bzs->total_out
64
65#endif /* ! BZ_CONFIG_ERROR */
66
67
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000068#ifdef WITH_THREAD
69#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000076/* Bits in f_newlinetypes */
77#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
78#define NEWLINE_CR 1 /* \r newline seen */
79#define NEWLINE_LF 2 /* \n newline seen */
80#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000081
82/* ===================================================================== */
83/* Structure definitions. */
84
85typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000086 PyObject_HEAD
87 PyObject *file;
88
89 char* f_buf; /* Allocated readahead buffer */
90 char* f_bufend; /* Points after last occupied position */
91 char* f_bufptr; /* Current buffer position */
92
93 int f_softspace; /* Flag used by 'print' command */
94
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000095 int f_univ_newline; /* Handle any newline convention */
96 int f_newlinetypes; /* Types of newlines seen */
97 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000099 BZFILE *fp;
100 int mode;
101 long pos;
102 long size;
103#ifdef WITH_THREAD
104 PyThread_type_lock lock;
105#endif
106} BZ2FileObject;
107
108typedef struct {
109 PyObject_HEAD
110 bz_stream bzs;
111 int running;
112#ifdef WITH_THREAD
113 PyThread_type_lock lock;
114#endif
115} BZ2CompObject;
116
117typedef struct {
118 PyObject_HEAD
119 bz_stream bzs;
120 int running;
121 PyObject *unused_data;
122#ifdef WITH_THREAD
123 PyThread_type_lock lock;
124#endif
125} BZ2DecompObject;
126
127/* ===================================================================== */
128/* Utility functions. */
129
130static int
131Util_CatchBZ2Error(int bzerror)
132{
133 int ret = 0;
134 switch(bzerror) {
135 case BZ_OK:
136 case BZ_STREAM_END:
137 break;
138
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000139#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000140 case BZ_CONFIG_ERROR:
141 PyErr_SetString(PyExc_SystemError,
142 "the bz2 library was not compiled "
143 "correctly");
144 ret = 1;
145 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000146#endif
Tim Peterse3228092002-11-09 04:21:44 +0000147
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000148 case BZ_PARAM_ERROR:
149 PyErr_SetString(PyExc_ValueError,
150 "the bz2 library has received wrong "
151 "parameters");
152 ret = 1;
153 break;
Tim Peterse3228092002-11-09 04:21:44 +0000154
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000155 case BZ_MEM_ERROR:
156 PyErr_NoMemory();
157 ret = 1;
158 break;
159
160 case BZ_DATA_ERROR:
161 case BZ_DATA_ERROR_MAGIC:
162 PyErr_SetString(PyExc_IOError, "invalid data stream");
163 ret = 1;
164 break;
165
166 case BZ_IO_ERROR:
167 PyErr_SetString(PyExc_IOError, "unknown IO error");
168 ret = 1;
169 break;
170
171 case BZ_UNEXPECTED_EOF:
172 PyErr_SetString(PyExc_EOFError,
173 "compressed file ended before the "
174 "logical end-of-stream was detected");
175 ret = 1;
176 break;
177
178 case BZ_SEQUENCE_ERROR:
179 PyErr_SetString(PyExc_RuntimeError,
180 "wrong sequence of bz2 library "
181 "commands used");
182 ret = 1;
183 break;
184 }
185 return ret;
186}
187
188#if BUFSIZ < 8192
189#define SMALLCHUNK 8192
190#else
191#define SMALLCHUNK BUFSIZ
192#endif
193
194#if SIZEOF_INT < 4
195#define BIGCHUNK (512 * 32)
196#else
197#define BIGCHUNK (512 * 1024)
198#endif
199
200/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
201static size_t
202Util_NewBufferSize(size_t currentsize)
203{
204 if (currentsize > SMALLCHUNK) {
205 /* Keep doubling until we reach BIGCHUNK;
206 then keep adding BIGCHUNK. */
207 if (currentsize <= BIGCHUNK)
208 return currentsize + currentsize;
209 else
210 return currentsize + BIGCHUNK;
211 }
212 return currentsize + SMALLCHUNK;
213}
214
215/* This is a hacked version of Python's fileobject.c:get_line(). */
216static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000217Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000218{
219 char c;
220 char *buf, *end;
221 size_t total_v_size; /* total # of slots in buffer */
222 size_t used_v_size; /* # used slots in buffer */
223 size_t increment; /* amount to increment the buffer */
224 PyObject *v;
225 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000226 int newlinetypes = f->f_newlinetypes;
227 int skipnextlf = f->f_skipnextlf;
228 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229
230 total_v_size = n > 0 ? n : 100;
231 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
232 if (v == NULL)
233 return NULL;
234
235 buf = BUF(v);
236 end = buf + total_v_size;
237
238 for (;;) {
239 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000240 if (univ_newline) {
241 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000242 BZ2_bzRead(&bzerror, f->fp, &c, 1);
243 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000244 if (bzerror != BZ_OK || buf == end)
245 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000246 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000247 skipnextlf = 0;
248 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000249 /* Seeing a \n here with
250 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 * saw a \r before.
252 */
253 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255 &c, 1);
256 if (bzerror != BZ_OK)
257 break;
258 } else {
259 newlinetypes |= NEWLINE_CR;
260 }
261 }
262 if (c == '\r') {
263 skipnextlf = 1;
264 c = '\n';
265 } else if ( c == '\n')
266 newlinetypes |= NEWLINE_LF;
267 *buf++ = c;
268 if (c == '\n') break;
269 }
270 if (bzerror == BZ_STREAM_END && skipnextlf)
271 newlinetypes |= NEWLINE_CR;
272 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000273 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000274 BZ2_bzRead(&bzerror, f->fp, &c, 1);
275 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000276 *buf++ = c;
277 } while (bzerror == BZ_OK && c != '\n' && buf != end);
278 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000279 f->f_newlinetypes = newlinetypes;
280 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000281 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000282 f->size = f->pos;
283 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 break;
285 } else if (bzerror != BZ_OK) {
286 Util_CatchBZ2Error(bzerror);
287 Py_DECREF(v);
288 return NULL;
289 }
290 if (c == '\n')
291 break;
292 /* Must be because buf == end */
293 if (n > 0)
294 break;
295 used_v_size = total_v_size;
296 increment = total_v_size >> 2; /* mild exponential growth */
297 total_v_size += increment;
298 if (total_v_size > INT_MAX) {
299 PyErr_SetString(PyExc_OverflowError,
300 "line is longer than a Python string can hold");
301 Py_DECREF(v);
302 return NULL;
303 }
304 if (_PyString_Resize(&v, total_v_size) < 0)
305 return NULL;
306 buf = BUF(v) + used_v_size;
307 end = BUF(v) + total_v_size;
308 }
309
310 used_v_size = buf - BUF(v);
311 if (used_v_size != total_v_size)
312 _PyString_Resize(&v, used_v_size);
313 return v;
314}
315
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000316/* This is a hacked version of Python's
317 * fileobject.c:Py_UniversalNewlineFread(). */
318size_t
319Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000320 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000321{
322 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000323 int newlinetypes, skipnextlf;
324
325 assert(buf != NULL);
326 assert(stream != NULL);
327
328 if (!f->f_univ_newline)
329 return BZ2_bzRead(bzerror, stream, buf, n);
330
331 newlinetypes = f->f_newlinetypes;
332 skipnextlf = f->f_skipnextlf;
333
334 /* Invariant: n is the number of bytes remaining to be filled
335 * in the buffer.
336 */
337 while (n) {
338 size_t nread;
339 int shortread;
340 char *src = dst;
341
342 nread = BZ2_bzRead(bzerror, stream, dst, n);
343 assert(nread <= n);
344 n -= nread; /* assuming 1 byte out for each in; will adjust */
345 shortread = n != 0; /* true iff EOF or error */
346 while (nread--) {
347 char c = *src++;
348 if (c == '\r') {
349 /* Save as LF and set flag to skip next LF. */
350 *dst++ = '\n';
351 skipnextlf = 1;
352 }
353 else if (skipnextlf && c == '\n') {
354 /* Skip LF, and remember we saw CR LF. */
355 skipnextlf = 0;
356 newlinetypes |= NEWLINE_CRLF;
357 ++n;
358 }
359 else {
360 /* Normal char to be stored in buffer. Also
361 * update the newlinetypes flag if either this
362 * is an LF or the previous char was a CR.
363 */
364 if (c == '\n')
365 newlinetypes |= NEWLINE_LF;
366 else if (skipnextlf)
367 newlinetypes |= NEWLINE_CR;
368 *dst++ = c;
369 skipnextlf = 0;
370 }
371 }
372 if (shortread) {
373 /* If this is EOF, update type flags. */
374 if (skipnextlf && *bzerror == BZ_STREAM_END)
375 newlinetypes |= NEWLINE_CR;
376 break;
377 }
378 }
379 f->f_newlinetypes = newlinetypes;
380 f->f_skipnextlf = skipnextlf;
381 return dst - buf;
382}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000383
384/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
385static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000386Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000387{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000388 if (f->f_buf != NULL) {
389 PyMem_Free(f->f_buf);
390 f->f_buf = NULL;
391 }
392}
393
394/* This is a hacked version of Python's fileobject.c:readahead(). */
395static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000396Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000397{
398 int chunksize;
399 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400
401 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000402 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403 return 0;
404 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000405 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000407 if (f->mode == MODE_READ_EOF) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408 return -1;
409 }
410 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
411 return -1;
412 }
413 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000414 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
415 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000416 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 f->size = f->pos;
420 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000421 } else if (bzerror != BZ_OK) {
422 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424 return -1;
425 }
426 f->f_bufptr = f->f_buf;
427 f->f_bufend = f->f_buf + chunksize;
428 return 0;
429}
430
431/* This is a hacked version of Python's
432 * fileobject.c:readahead_get_line_skip(). */
433static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000434Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 PyStringObject* s;
437 char *bufptr;
438 char *buf;
439 int len;
440
441 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000442 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000443 return NULL;
444
445 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000446 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000447 return (PyStringObject *)
448 PyString_FromStringAndSize(NULL, skip);
449 bufptr = memchr(f->f_bufptr, '\n', len);
450 if (bufptr != NULL) {
451 bufptr++; /* Count the '\n' */
452 len = bufptr - f->f_bufptr;
453 s = (PyStringObject *)
454 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000455 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000456 return NULL;
457 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
458 f->f_bufptr = bufptr;
459 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000460 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 } else {
462 bufptr = f->f_bufptr;
463 buf = f->f_buf;
464 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000465 s = Util_ReadAheadGetLineSkip(f, skip+len,
466 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 if (s == NULL) {
468 PyMem_Free(buf);
469 return NULL;
470 }
471 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
472 PyMem_Free(buf);
473 }
474 return s;
475}
476
477/* ===================================================================== */
478/* Methods of BZ2File. */
479
480PyDoc_STRVAR(BZ2File_read__doc__,
481"read([size]) -> string\n\
482\n\
483Read at most size uncompressed bytes, returned as a string. If the size\n\
484argument is negative or omitted, read until EOF is reached.\n\
485");
486
487/* This is a hacked version of Python's fileobject.c:file_read(). */
488static PyObject *
489BZ2File_read(BZ2FileObject *self, PyObject *args)
490{
491 long bytesrequested = -1;
492 size_t bytesread, buffersize, chunksize;
493 int bzerror;
494 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000495
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000496 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
497 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000498
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000499 ACQUIRE_LOCK(self);
500 switch (self->mode) {
501 case MODE_READ:
502 break;
503 case MODE_READ_EOF:
504 ret = PyString_FromString("");
505 goto cleanup;
506 case MODE_CLOSED:
507 PyErr_SetString(PyExc_ValueError,
508 "I/O operation on closed file");
509 goto cleanup;
510 default:
511 PyErr_SetString(PyExc_IOError,
512 "file is not ready for reading");
513 goto cleanup;
514 }
515
516 if (bytesrequested < 0)
517 buffersize = Util_NewBufferSize((size_t)0);
518 else
519 buffersize = bytesrequested;
520 if (buffersize > INT_MAX) {
521 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000522 "requested number of bytes is "
523 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000524 goto cleanup;
525 }
526 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
527 if (ret == NULL)
528 goto cleanup;
529 bytesread = 0;
530
531 for (;;) {
532 Py_BEGIN_ALLOW_THREADS
533 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
534 BUF(ret)+bytesread,
535 buffersize-bytesread,
536 self);
537 self->pos += chunksize;
538 Py_END_ALLOW_THREADS
539 bytesread += chunksize;
540 if (bzerror == BZ_STREAM_END) {
541 self->size = self->pos;
542 self->mode = MODE_READ_EOF;
543 break;
544 } else if (bzerror != BZ_OK) {
545 Util_CatchBZ2Error(bzerror);
546 Py_DECREF(ret);
547 ret = NULL;
548 goto cleanup;
549 }
550 if (bytesrequested < 0) {
551 buffersize = Util_NewBufferSize(buffersize);
552 if (_PyString_Resize(&ret, buffersize) < 0)
553 goto cleanup;
554 } else {
555 break;
556 }
557 }
558 if (bytesread != buffersize)
559 _PyString_Resize(&ret, bytesread);
560
561cleanup:
562 RELEASE_LOCK(self);
563 return ret;
564}
565
566PyDoc_STRVAR(BZ2File_readline__doc__,
567"readline([size]) -> string\n\
568\n\
569Return the next line from the file, as a string, retaining newline.\n\
570A non-negative size argument will limit the maximum number of bytes to\n\
571return (an incomplete line may be returned then). Return an empty\n\
572string at EOF.\n\
573");
574
575static PyObject *
576BZ2File_readline(BZ2FileObject *self, PyObject *args)
577{
578 PyObject *ret = NULL;
579 int sizehint = -1;
580
581 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
582 return NULL;
583
584 ACQUIRE_LOCK(self);
585 switch (self->mode) {
586 case MODE_READ:
587 break;
588 case MODE_READ_EOF:
589 ret = PyString_FromString("");
590 goto cleanup;
591 case MODE_CLOSED:
592 PyErr_SetString(PyExc_ValueError,
593 "I/O operation on closed file");
594 goto cleanup;
595 default:
596 PyErr_SetString(PyExc_IOError,
597 "file is not ready for reading");
598 goto cleanup;
599 }
600
601 if (sizehint == 0)
602 ret = PyString_FromString("");
603 else
604 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
605
606cleanup:
607 RELEASE_LOCK(self);
608 return ret;
609}
610
611PyDoc_STRVAR(BZ2File_readlines__doc__,
612"readlines([size]) -> list\n\
613\n\
614Call readline() repeatedly and return a list of lines read.\n\
615The optional size argument, if given, is an approximate bound on the\n\
616total number of bytes in the lines returned.\n\
617");
618
619/* This is a hacked version of Python's fileobject.c:file_readlines(). */
620static PyObject *
621BZ2File_readlines(BZ2FileObject *self, PyObject *args)
622{
623 long sizehint = 0;
624 PyObject *list = NULL;
625 PyObject *line;
626 char small_buffer[SMALLCHUNK];
627 char *buffer = small_buffer;
628 size_t buffersize = SMALLCHUNK;
629 PyObject *big_buffer = NULL;
630 size_t nfilled = 0;
631 size_t nread;
632 size_t totalread = 0;
633 char *p, *q, *end;
634 int err;
635 int shortread = 0;
636 int bzerror;
637
638 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
639 return NULL;
640
641 ACQUIRE_LOCK(self);
642 switch (self->mode) {
643 case MODE_READ:
644 break;
645 case MODE_READ_EOF:
646 list = PyList_New(0);
647 goto cleanup;
648 case MODE_CLOSED:
649 PyErr_SetString(PyExc_ValueError,
650 "I/O operation on closed file");
651 goto cleanup;
652 default:
653 PyErr_SetString(PyExc_IOError,
654 "file is not ready for reading");
655 goto cleanup;
656 }
657
658 if ((list = PyList_New(0)) == NULL)
659 goto cleanup;
660
661 for (;;) {
662 Py_BEGIN_ALLOW_THREADS
663 nread = Util_UnivNewlineRead(&bzerror, self->fp,
664 buffer+nfilled,
665 buffersize-nfilled, self);
666 self->pos += nread;
667 Py_END_ALLOW_THREADS
668 if (bzerror == BZ_STREAM_END) {
669 self->size = self->pos;
670 self->mode = MODE_READ_EOF;
671 if (nread == 0) {
672 sizehint = 0;
673 break;
674 }
675 shortread = 1;
676 } else if (bzerror != BZ_OK) {
677 Util_CatchBZ2Error(bzerror);
678 error:
679 Py_DECREF(list);
680 list = NULL;
681 goto cleanup;
682 }
683 totalread += nread;
684 p = memchr(buffer+nfilled, '\n', nread);
685 if (p == NULL) {
686 /* Need a larger buffer to fit this line */
687 nfilled += nread;
688 buffersize *= 2;
689 if (buffersize > INT_MAX) {
690 PyErr_SetString(PyExc_OverflowError,
691 "line is longer than a Python string can hold");
692 goto error;
693 }
694 if (big_buffer == NULL) {
695 /* Create the big buffer */
696 big_buffer = PyString_FromStringAndSize(
697 NULL, buffersize);
698 if (big_buffer == NULL)
699 goto error;
700 buffer = PyString_AS_STRING(big_buffer);
701 memcpy(buffer, small_buffer, nfilled);
702 }
703 else {
704 /* Grow the big buffer */
705 _PyString_Resize(&big_buffer, buffersize);
706 buffer = PyString_AS_STRING(big_buffer);
707 }
708 continue;
709 }
710 end = buffer+nfilled+nread;
711 q = buffer;
712 do {
713 /* Process complete lines */
714 p++;
715 line = PyString_FromStringAndSize(q, p-q);
716 if (line == NULL)
717 goto error;
718 err = PyList_Append(list, line);
719 Py_DECREF(line);
720 if (err != 0)
721 goto error;
722 q = p;
723 p = memchr(q, '\n', end-q);
724 } while (p != NULL);
725 /* Move the remaining incomplete line to the start */
726 nfilled = end-q;
727 memmove(buffer, q, nfilled);
728 if (sizehint > 0)
729 if (totalread >= (size_t)sizehint)
730 break;
731 if (shortread) {
732 sizehint = 0;
733 break;
734 }
735 }
736 if (nfilled != 0) {
737 /* Partial last line */
738 line = PyString_FromStringAndSize(buffer, nfilled);
739 if (line == NULL)
740 goto error;
741 if (sizehint > 0) {
742 /* Need to complete the last line */
743 PyObject *rest = Util_GetLine(self, 0);
744 if (rest == NULL) {
745 Py_DECREF(line);
746 goto error;
747 }
748 PyString_Concat(&line, rest);
749 Py_DECREF(rest);
750 if (line == NULL)
751 goto error;
752 }
753 err = PyList_Append(list, line);
754 Py_DECREF(line);
755 if (err != 0)
756 goto error;
757 }
758
759 cleanup:
760 RELEASE_LOCK(self);
761 if (big_buffer) {
762 Py_DECREF(big_buffer);
763 }
764 return list;
765}
766
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000767PyDoc_STRVAR(BZ2File_xreadlines__doc__,
768"xreadlines() -> self\n\
769\n\
770For backward compatibility. BZ2File objects now include the performance\n\
771optimizations previously implemented in the xreadlines module.\n\
772");
773
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000774PyDoc_STRVAR(BZ2File_write__doc__,
775"write(data) -> None\n\
776\n\
777Write the 'data' string to file. Note that due to buffering, close() may\n\
778be needed before the file on disk reflects the data written.\n\
779");
780
781/* This is a hacked version of Python's fileobject.c:file_write(). */
782static PyObject *
783BZ2File_write(BZ2FileObject *self, PyObject *args)
784{
785 PyObject *ret = NULL;
786 char *buf;
787 int len;
788 int bzerror;
789
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000790 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000791 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000792
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000793 ACQUIRE_LOCK(self);
794 switch (self->mode) {
795 case MODE_WRITE:
796 break;
Tim Peterse3228092002-11-09 04:21:44 +0000797
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000798 case MODE_CLOSED:
799 PyErr_SetString(PyExc_ValueError,
800 "I/O operation on closed file");
801 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000802
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000803 default:
804 PyErr_SetString(PyExc_IOError,
805 "file is not ready for writing");
806 goto cleanup;;
807 }
808
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000809 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810
811 Py_BEGIN_ALLOW_THREADS
812 BZ2_bzWrite (&bzerror, self->fp, buf, len);
813 self->pos += len;
814 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000815
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 if (bzerror != BZ_OK) {
817 Util_CatchBZ2Error(bzerror);
818 goto cleanup;
819 }
Tim Peterse3228092002-11-09 04:21:44 +0000820
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000821 Py_INCREF(Py_None);
822 ret = Py_None;
823
824cleanup:
825 RELEASE_LOCK(self);
826 return ret;
827}
828
829PyDoc_STRVAR(BZ2File_writelines__doc__,
830"writelines(sequence_of_strings) -> None\n\
831\n\
832Write the sequence of strings to the file. Note that newlines are not\n\
833added. The sequence can be any iterable object producing strings. This is\n\
834equivalent to calling write() for each string.\n\
835");
836
837/* This is a hacked version of Python's fileobject.c:file_writelines(). */
838static PyObject *
839BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
840{
841#define CHUNKSIZE 1000
842 PyObject *list = NULL;
843 PyObject *iter = NULL;
844 PyObject *ret = NULL;
845 PyObject *line;
846 int i, j, index, len, islist;
847 int bzerror;
848
849 ACQUIRE_LOCK(self);
850 islist = PyList_Check(seq);
851 if (!islist) {
852 iter = PyObject_GetIter(seq);
853 if (iter == NULL) {
854 PyErr_SetString(PyExc_TypeError,
855 "writelines() requires an iterable argument");
856 goto error;
857 }
858 list = PyList_New(CHUNKSIZE);
859 if (list == NULL)
860 goto error;
861 }
862
863 /* Strategy: slurp CHUNKSIZE lines into a private list,
864 checking that they are all strings, then write that list
865 without holding the interpreter lock, then come back for more. */
866 for (index = 0; ; index += CHUNKSIZE) {
867 if (islist) {
868 Py_XDECREF(list);
869 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
870 if (list == NULL)
871 goto error;
872 j = PyList_GET_SIZE(list);
873 }
874 else {
875 for (j = 0; j < CHUNKSIZE; j++) {
876 line = PyIter_Next(iter);
877 if (line == NULL) {
878 if (PyErr_Occurred())
879 goto error;
880 break;
881 }
882 PyList_SetItem(list, j, line);
883 }
884 }
885 if (j == 0)
886 break;
887
888 /* Check that all entries are indeed strings. If not,
889 apply the same rules as for file.write() and
890 convert the rets to strings. This is slow, but
891 seems to be the only way since all conversion APIs
892 could potentially execute Python code. */
893 for (i = 0; i < j; i++) {
894 PyObject *v = PyList_GET_ITEM(list, i);
895 if (!PyString_Check(v)) {
896 const char *buffer;
897 int len;
898 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
899 PyErr_SetString(PyExc_TypeError,
900 "writelines() "
901 "argument must be "
902 "a sequence of "
903 "strings");
904 goto error;
905 }
906 line = PyString_FromStringAndSize(buffer,
907 len);
908 if (line == NULL)
909 goto error;
910 Py_DECREF(v);
911 PyList_SET_ITEM(list, i, line);
912 }
913 }
914
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000915 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000916
917 /* Since we are releasing the global lock, the
918 following code may *not* execute Python code. */
919 Py_BEGIN_ALLOW_THREADS
920 for (i = 0; i < j; i++) {
921 line = PyList_GET_ITEM(list, i);
922 len = PyString_GET_SIZE(line);
923 BZ2_bzWrite (&bzerror, self->fp,
924 PyString_AS_STRING(line), len);
925 if (bzerror != BZ_OK) {
926 Py_BLOCK_THREADS
927 Util_CatchBZ2Error(bzerror);
928 goto error;
929 }
930 }
931 Py_END_ALLOW_THREADS
932
933 if (j < CHUNKSIZE)
934 break;
935 }
936
937 Py_INCREF(Py_None);
938 ret = Py_None;
939
940 error:
941 RELEASE_LOCK(self);
942 Py_XDECREF(list);
943 Py_XDECREF(iter);
944 return ret;
945#undef CHUNKSIZE
946}
947
948PyDoc_STRVAR(BZ2File_seek__doc__,
949"seek(offset [, whence]) -> None\n\
950\n\
951Move to new file position. Argument offset is a byte count. Optional\n\
952argument whence defaults to 0 (offset from start of file, offset\n\
953should be >= 0); other values are 1 (move relative to current position,\n\
954positive or negative), and 2 (move relative to end of file, usually\n\
955negative, although many platforms allow seeking beyond the end of a file).\n\
956\n\
957Note that seeking of bz2 files is emulated, and depending on the parameters\n\
958the operation may be extremely slow.\n\
959");
960
961static PyObject *
962BZ2File_seek(BZ2FileObject *self, PyObject *args)
963{
964 int where = 0;
965 long offset;
966 char small_buffer[SMALLCHUNK];
967 char *buffer = small_buffer;
968 size_t buffersize = SMALLCHUNK;
969 int bytesread = 0;
970 int readsize;
971 int chunksize;
972 int bzerror;
973 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000974 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000975
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000976 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
977 return NULL;
978
979 ACQUIRE_LOCK(self);
980 Util_DropReadAhead(self);
981 switch (self->mode) {
982 case MODE_READ:
983 case MODE_READ_EOF:
984 break;
Tim Peterse3228092002-11-09 04:21:44 +0000985
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 case MODE_CLOSED:
987 PyErr_SetString(PyExc_ValueError,
988 "I/O operation on closed file");
989 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000990
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000991 default:
992 PyErr_SetString(PyExc_IOError,
993 "seek works only while reading");
994 goto cleanup;;
995 }
996
997 if (offset < 0) {
998 if (where == 1) {
999 offset = self->pos + offset;
1000 rewind = 1;
1001 } else if (where == 2) {
1002 if (self->size == -1) {
1003 assert(self->mode != MODE_READ_EOF);
1004 for (;;) {
1005 Py_BEGIN_ALLOW_THREADS
1006 chunksize = Util_UnivNewlineRead(
1007 &bzerror, self->fp,
1008 buffer, buffersize,
1009 self);
1010 self->pos += chunksize;
1011 Py_END_ALLOW_THREADS
1012
1013 bytesread += chunksize;
1014 if (bzerror == BZ_STREAM_END) {
1015 break;
1016 } else if (bzerror != BZ_OK) {
1017 Util_CatchBZ2Error(bzerror);
1018 goto cleanup;
1019 }
1020 }
1021 self->mode = MODE_READ_EOF;
1022 self->size = self->pos;
1023 bytesread = 0;
1024 }
1025 offset = self->size + offset;
1026 if (offset >= self->pos)
1027 offset -= self->pos;
1028 else
1029 rewind = 1;
1030 }
1031 if (offset < 0)
1032 offset = 0;
1033 } else if (where == 0) {
1034 if (offset >= self->pos)
1035 offset -= self->pos;
1036 else
1037 rewind = 1;
1038 }
1039
1040 if (rewind) {
1041 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001042 if (bzerror != BZ_OK) {
1043 Util_CatchBZ2Error(bzerror);
1044 goto cleanup;
1045 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001046 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047 if (!ret)
1048 goto cleanup;
1049 Py_DECREF(ret);
1050 ret = NULL;
1051 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001052 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001053 0, 0, NULL, 0);
1054 if (bzerror != BZ_OK) {
1055 Util_CatchBZ2Error(bzerror);
1056 goto cleanup;
1057 }
1058 self->mode = MODE_READ;
1059 } else if (self->mode == MODE_READ_EOF) {
1060 goto exit;
1061 }
1062
1063 if (offset == 0)
1064 goto exit;
1065
1066 /* Before getting here, offset must be set to the number of bytes
1067 * to walk forward. */
1068 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001069 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 readsize = buffersize;
1071 else
1072 readsize = offset-bytesread;
1073 Py_BEGIN_ALLOW_THREADS
1074 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1075 buffer, readsize, self);
1076 self->pos += chunksize;
1077 Py_END_ALLOW_THREADS
1078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 self->size = self->pos;
1081 self->mode = MODE_READ_EOF;
1082 break;
1083 } else if (bzerror != BZ_OK) {
1084 Util_CatchBZ2Error(bzerror);
1085 goto cleanup;
1086 }
1087 if (bytesread == offset)
1088 break;
1089 }
1090
1091exit:
1092 Py_INCREF(Py_None);
1093 ret = Py_None;
1094
1095cleanup:
1096 RELEASE_LOCK(self);
1097 return ret;
1098}
1099
1100PyDoc_STRVAR(BZ2File_tell__doc__,
1101"tell() -> int\n\
1102\n\
1103Return the current file position, an integer (may be a long integer).\n\
1104");
1105
1106static PyObject *
1107BZ2File_tell(BZ2FileObject *self, PyObject *args)
1108{
1109 PyObject *ret = NULL;
1110
1111 if (self->mode == MODE_CLOSED) {
1112 PyErr_SetString(PyExc_ValueError,
1113 "I/O operation on closed file");
1114 goto cleanup;
1115 }
1116
1117 ret = PyInt_FromLong(self->pos);
1118
1119cleanup:
1120 return ret;
1121}
1122
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001123PyDoc_STRVAR(BZ2File_close__doc__,
1124"close() -> None or (perhaps) an integer\n\
1125\n\
1126Close the file. Sets data attribute .closed to true. A closed file\n\
1127cannot be used for further I/O operations. close() may be called more\n\
1128than once without error.\n\
1129");
1130
1131static PyObject *
1132BZ2File_close(BZ2FileObject *self)
1133{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001134 PyObject *ret = NULL;
1135 int bzerror = BZ_OK;
1136
1137 ACQUIRE_LOCK(self);
1138 switch (self->mode) {
1139 case MODE_READ:
1140 case MODE_READ_EOF:
1141 BZ2_bzReadClose(&bzerror, self->fp);
1142 break;
1143 case MODE_WRITE:
1144 BZ2_bzWriteClose(&bzerror, self->fp,
1145 0, NULL, NULL);
1146 break;
1147 }
1148 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001149 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001150 if (bzerror != BZ_OK) {
1151 Util_CatchBZ2Error(bzerror);
1152 Py_XDECREF(ret);
1153 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154 }
1155
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156 RELEASE_LOCK(self);
1157 return ret;
1158}
1159
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001160static PyObject *BZ2File_getiter(BZ2FileObject *self);
1161
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001162static PyMethodDef BZ2File_methods[] = {
1163 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001166 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1168 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1169 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1170 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001171 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1172 {NULL, NULL} /* sentinel */
1173};
1174
1175
1176/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001177/* Getters and setters of BZ2File. */
1178
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001179/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1180static PyObject *
1181BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1182{
1183 switch (self->f_newlinetypes) {
1184 case NEWLINE_UNKNOWN:
1185 Py_INCREF(Py_None);
1186 return Py_None;
1187 case NEWLINE_CR:
1188 return PyString_FromString("\r");
1189 case NEWLINE_LF:
1190 return PyString_FromString("\n");
1191 case NEWLINE_CR|NEWLINE_LF:
1192 return Py_BuildValue("(ss)", "\r", "\n");
1193 case NEWLINE_CRLF:
1194 return PyString_FromString("\r\n");
1195 case NEWLINE_CR|NEWLINE_CRLF:
1196 return Py_BuildValue("(ss)", "\r", "\r\n");
1197 case NEWLINE_LF|NEWLINE_CRLF:
1198 return Py_BuildValue("(ss)", "\n", "\r\n");
1199 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1200 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1201 default:
1202 PyErr_Format(PyExc_SystemError,
1203 "Unknown newlines value 0x%x\n",
1204 self->f_newlinetypes);
1205 return NULL;
1206 }
1207}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001208
1209static PyObject *
1210BZ2File_get_closed(BZ2FileObject *self, void *closure)
1211{
1212 return PyInt_FromLong(self->mode == MODE_CLOSED);
1213}
1214
1215static PyObject *
1216BZ2File_get_mode(BZ2FileObject *self, void *closure)
1217{
1218 return PyObject_GetAttrString(self->file, "mode");
1219}
1220
1221static PyObject *
1222BZ2File_get_name(BZ2FileObject *self, void *closure)
1223{
1224 return PyObject_GetAttrString(self->file, "name");
1225}
1226
1227static PyGetSetDef BZ2File_getset[] = {
1228 {"closed", (getter)BZ2File_get_closed, NULL,
1229 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001230 {"newlines", (getter)BZ2File_get_newlines, NULL,
1231 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001232 {"mode", (getter)BZ2File_get_mode, NULL,
1233 "file mode ('r', 'w', or 'U')"},
1234 {"name", (getter)BZ2File_get_name, NULL,
1235 "file name"},
1236 {NULL} /* Sentinel */
1237};
1238
1239
1240/* ===================================================================== */
1241/* Members of BZ2File_Type. */
1242
1243#undef OFF
1244#define OFF(x) offsetof(BZ2FileObject, x)
1245
1246static PyMemberDef BZ2File_members[] = {
1247 {"softspace", T_INT, OFF(f_softspace), 0,
1248 "flag indicating that a space needs to be printed; used by print"},
1249 {NULL} /* Sentinel */
1250};
1251
1252/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001253/* Slot definitions for BZ2File_Type. */
1254
1255static int
1256BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1257{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001258 static char *kwlist[] = {"filename", "mode", "buffering",
1259 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001260 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001261 char *mode = "r";
1262 int buffering = -1;
1263 int compresslevel = 9;
1264 int bzerror;
1265 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001266
1267 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001268
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001269 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1270 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271 &compresslevel))
1272 return -1;
1273
1274 if (compresslevel < 1 || compresslevel > 9) {
1275 PyErr_SetString(PyExc_ValueError,
1276 "compresslevel must be between 1 and 9");
1277 return -1;
1278 }
1279
1280 for (;;) {
1281 int error = 0;
1282 switch (*mode) {
1283 case 'r':
1284 case 'w':
1285 if (mode_char)
1286 error = 1;
1287 mode_char = *mode;
1288 break;
1289
1290 case 'b':
1291 break;
1292
1293 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001294 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001295 break;
1296
1297 default:
1298 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001299 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001300 }
1301 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001302 PyErr_Format(PyExc_ValueError,
1303 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001304 return -1;
1305 }
1306 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001307 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001308 break;
1309 }
1310
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001311 if (mode_char == 0) {
1312 mode_char = 'r';
1313 }
1314
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001315 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001316
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001317 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1318 name, mode, buffering);
1319 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001320 return -1;
1321
1322 /* From now on, we have stuff to dealloc, so jump to error label
1323 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001324
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001325#ifdef WITH_THREAD
1326 self->lock = PyThread_allocate_lock();
1327 if (!self->lock)
1328 goto error;
1329#endif
1330
1331 if (mode_char == 'r')
1332 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001333 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001334 0, 0, NULL, 0);
1335 else
1336 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001337 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001338 compresslevel, 0, 0);
1339
1340 if (bzerror != BZ_OK) {
1341 Util_CatchBZ2Error(bzerror);
1342 goto error;
1343 }
1344
1345 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1346
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347 return 0;
1348
1349error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001350 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001351#ifdef WITH_THREAD
1352 if (self->lock)
1353 PyThread_free_lock(self->lock);
1354#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001355 return -1;
1356}
1357
1358static void
1359BZ2File_dealloc(BZ2FileObject *self)
1360{
1361 int bzerror;
1362#ifdef WITH_THREAD
1363 if (self->lock)
1364 PyThread_free_lock(self->lock);
1365#endif
1366 switch (self->mode) {
1367 case MODE_READ:
1368 case MODE_READ_EOF:
1369 BZ2_bzReadClose(&bzerror, self->fp);
1370 break;
1371 case MODE_WRITE:
1372 BZ2_bzWriteClose(&bzerror, self->fp,
1373 0, NULL, NULL);
1374 break;
1375 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001376 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001377 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001378 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001379}
1380
1381/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1382static PyObject *
1383BZ2File_getiter(BZ2FileObject *self)
1384{
1385 if (self->mode == MODE_CLOSED) {
1386 PyErr_SetString(PyExc_ValueError,
1387 "I/O operation on closed file");
1388 return NULL;
1389 }
1390 Py_INCREF((PyObject*)self);
1391 return (PyObject *)self;
1392}
1393
1394/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1395#define READAHEAD_BUFSIZE 8192
1396static PyObject *
1397BZ2File_iternext(BZ2FileObject *self)
1398{
1399 PyStringObject* ret;
1400 ACQUIRE_LOCK(self);
1401 if (self->mode == MODE_CLOSED) {
1402 PyErr_SetString(PyExc_ValueError,
1403 "I/O operation on closed file");
1404 return NULL;
1405 }
1406 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1407 RELEASE_LOCK(self);
1408 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1409 Py_XDECREF(ret);
1410 return NULL;
1411 }
1412 return (PyObject *)ret;
1413}
1414
1415/* ===================================================================== */
1416/* BZ2File_Type definition. */
1417
1418PyDoc_VAR(BZ2File__doc__) =
1419PyDoc_STR(
1420"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1421\n\
1422Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1423writing. When opened for writing, the file will be created if it doesn't\n\
1424exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1425unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1426is given, must be a number between 1 and 9.\n\
1427")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001428PyDoc_STR(
1429"\n\
1430Add a 'U' to mode to open the file for input with universal newline\n\
1431support. Any line ending in the input file will be seen as a '\\n' in\n\
1432Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1433for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1434'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1435newlines are available only when reading.\n\
1436")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437;
1438
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001439static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001440 PyObject_HEAD_INIT(NULL)
1441 0, /*ob_size*/
1442 "bz2.BZ2File", /*tp_name*/
1443 sizeof(BZ2FileObject), /*tp_basicsize*/
1444 0, /*tp_itemsize*/
1445 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1446 0, /*tp_print*/
1447 0, /*tp_getattr*/
1448 0, /*tp_setattr*/
1449 0, /*tp_compare*/
1450 0, /*tp_repr*/
1451 0, /*tp_as_number*/
1452 0, /*tp_as_sequence*/
1453 0, /*tp_as_mapping*/
1454 0, /*tp_hash*/
1455 0, /*tp_call*/
1456 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001457 PyObject_GenericGetAttr,/*tp_getattro*/
1458 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001459 0, /*tp_as_buffer*/
1460 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1461 BZ2File__doc__, /*tp_doc*/
1462 0, /*tp_traverse*/
1463 0, /*tp_clear*/
1464 0, /*tp_richcompare*/
1465 0, /*tp_weaklistoffset*/
1466 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1467 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1468 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001469 BZ2File_members, /*tp_members*/
1470 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001471 0, /*tp_base*/
1472 0, /*tp_dict*/
1473 0, /*tp_descr_get*/
1474 0, /*tp_descr_set*/
1475 0, /*tp_dictoffset*/
1476 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001477 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001478 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001479 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001480 0, /*tp_is_gc*/
1481};
1482
1483
1484/* ===================================================================== */
1485/* Methods of BZ2Comp. */
1486
1487PyDoc_STRVAR(BZ2Comp_compress__doc__,
1488"compress(data) -> string\n\
1489\n\
1490Provide more data to the compressor object. It will return chunks of\n\
1491compressed data whenever possible. When you've finished providing data\n\
1492to compress, call the flush() method to finish the compression process,\n\
1493and return what is left in the internal buffers.\n\
1494");
1495
1496static PyObject *
1497BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1498{
1499 char *data;
1500 int datasize;
1501 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001502 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001503 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504 bz_stream *bzs = &self->bzs;
1505 int bzerror;
1506
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001507 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001508 return NULL;
1509
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001510 if (datasize == 0)
1511 return PyString_FromString("");
1512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001513 ACQUIRE_LOCK(self);
1514 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001515 PyErr_SetString(PyExc_ValueError,
1516 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001517 goto error;
1518 }
1519
1520 ret = PyString_FromStringAndSize(NULL, bufsize);
1521 if (!ret)
1522 goto error;
1523
1524 bzs->next_in = data;
1525 bzs->avail_in = datasize;
1526 bzs->next_out = BUF(ret);
1527 bzs->avail_out = bufsize;
1528
1529 totalout = BZS_TOTAL_OUT(bzs);
1530
1531 for (;;) {
1532 Py_BEGIN_ALLOW_THREADS
1533 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1534 Py_END_ALLOW_THREADS
1535 if (bzerror != BZ_RUN_OK) {
1536 Util_CatchBZ2Error(bzerror);
1537 goto error;
1538 }
1539 if (bzs->avail_out == 0) {
1540 bufsize = Util_NewBufferSize(bufsize);
1541 if (_PyString_Resize(&ret, bufsize) < 0) {
1542 BZ2_bzCompressEnd(bzs);
1543 goto error;
1544 }
1545 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1546 - totalout);
1547 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1548 } else if (bzs->avail_in == 0) {
1549 break;
1550 }
1551 }
1552
Tim Petersf29f0c62002-11-09 04:28:17 +00001553 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001554
1555 RELEASE_LOCK(self);
1556 return ret;
1557
1558error:
1559 RELEASE_LOCK(self);
1560 Py_XDECREF(ret);
1561 return NULL;
1562}
1563
1564PyDoc_STRVAR(BZ2Comp_flush__doc__,
1565"flush() -> string\n\
1566\n\
1567Finish the compression process and return what is left in internal buffers.\n\
1568You must not use the compressor object after calling this method.\n\
1569");
1570
1571static PyObject *
1572BZ2Comp_flush(BZ2CompObject *self)
1573{
1574 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001575 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001576 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001577 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001578 int bzerror;
1579
1580 ACQUIRE_LOCK(self);
1581 if (!self->running) {
1582 PyErr_SetString(PyExc_ValueError, "object was already "
1583 "flushed");
1584 goto error;
1585 }
1586 self->running = 0;
1587
1588 ret = PyString_FromStringAndSize(NULL, bufsize);
1589 if (!ret)
1590 goto error;
1591
1592 bzs->next_out = BUF(ret);
1593 bzs->avail_out = bufsize;
1594
1595 totalout = BZS_TOTAL_OUT(bzs);
1596
1597 for (;;) {
1598 Py_BEGIN_ALLOW_THREADS
1599 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1600 Py_END_ALLOW_THREADS
1601 if (bzerror == BZ_STREAM_END) {
1602 break;
1603 } else if (bzerror != BZ_FINISH_OK) {
1604 Util_CatchBZ2Error(bzerror);
1605 goto error;
1606 }
1607 if (bzs->avail_out == 0) {
1608 bufsize = Util_NewBufferSize(bufsize);
1609 if (_PyString_Resize(&ret, bufsize) < 0)
1610 goto error;
1611 bzs->next_out = BUF(ret);
1612 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1613 - totalout);
1614 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1615 }
1616 }
1617
1618 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001619 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001620
1621 RELEASE_LOCK(self);
1622 return ret;
1623
1624error:
1625 RELEASE_LOCK(self);
1626 Py_XDECREF(ret);
1627 return NULL;
1628}
1629
1630static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001631 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1632 BZ2Comp_compress__doc__},
1633 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1634 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001635 {NULL, NULL} /* sentinel */
1636};
1637
1638
1639/* ===================================================================== */
1640/* Slot definitions for BZ2Comp_Type. */
1641
1642static int
1643BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1644{
1645 int compresslevel = 9;
1646 int bzerror;
1647 static char *kwlist[] = {"compresslevel", 0};
1648
1649 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1650 kwlist, &compresslevel))
1651 return -1;
1652
1653 if (compresslevel < 1 || compresslevel > 9) {
1654 PyErr_SetString(PyExc_ValueError,
1655 "compresslevel must be between 1 and 9");
1656 goto error;
1657 }
1658
1659#ifdef WITH_THREAD
1660 self->lock = PyThread_allocate_lock();
1661 if (!self->lock)
1662 goto error;
1663#endif
1664
1665 memset(&self->bzs, 0, sizeof(bz_stream));
1666 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1667 if (bzerror != BZ_OK) {
1668 Util_CatchBZ2Error(bzerror);
1669 goto error;
1670 }
1671
1672 self->running = 1;
1673
1674 return 0;
1675error:
1676#ifdef WITH_THREAD
1677 if (self->lock)
1678 PyThread_free_lock(self->lock);
1679#endif
1680 return -1;
1681}
1682
1683static void
1684BZ2Comp_dealloc(BZ2CompObject *self)
1685{
1686#ifdef WITH_THREAD
1687 if (self->lock)
1688 PyThread_free_lock(self->lock);
1689#endif
1690 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001691 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001692}
1693
1694
1695/* ===================================================================== */
1696/* BZ2Comp_Type definition. */
1697
1698PyDoc_STRVAR(BZ2Comp__doc__,
1699"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1700\n\
1701Create a new compressor object. This object may be used to compress\n\
1702data sequentially. If you want to compress data in one shot, use the\n\
1703compress() function instead. The compresslevel parameter, if given,\n\
1704must be a number between 1 and 9.\n\
1705");
1706
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001707static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001708 PyObject_HEAD_INIT(NULL)
1709 0, /*ob_size*/
1710 "bz2.BZ2Compressor", /*tp_name*/
1711 sizeof(BZ2CompObject), /*tp_basicsize*/
1712 0, /*tp_itemsize*/
1713 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1714 0, /*tp_print*/
1715 0, /*tp_getattr*/
1716 0, /*tp_setattr*/
1717 0, /*tp_compare*/
1718 0, /*tp_repr*/
1719 0, /*tp_as_number*/
1720 0, /*tp_as_sequence*/
1721 0, /*tp_as_mapping*/
1722 0, /*tp_hash*/
1723 0, /*tp_call*/
1724 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001725 PyObject_GenericGetAttr,/*tp_getattro*/
1726 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001727 0, /*tp_as_buffer*/
1728 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1729 BZ2Comp__doc__, /*tp_doc*/
1730 0, /*tp_traverse*/
1731 0, /*tp_clear*/
1732 0, /*tp_richcompare*/
1733 0, /*tp_weaklistoffset*/
1734 0, /*tp_iter*/
1735 0, /*tp_iternext*/
1736 BZ2Comp_methods, /*tp_methods*/
1737 0, /*tp_members*/
1738 0, /*tp_getset*/
1739 0, /*tp_base*/
1740 0, /*tp_dict*/
1741 0, /*tp_descr_get*/
1742 0, /*tp_descr_set*/
1743 0, /*tp_dictoffset*/
1744 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001745 PyType_GenericAlloc, /*tp_alloc*/
1746 PyType_GenericNew, /*tp_new*/
1747 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001748 0, /*tp_is_gc*/
1749};
1750
1751
1752/* ===================================================================== */
1753/* Members of BZ2Decomp. */
1754
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001755#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001756#define OFF(x) offsetof(BZ2DecompObject, x)
1757
1758static PyMemberDef BZ2Decomp_members[] = {
1759 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1760 {NULL} /* Sentinel */
1761};
1762
1763
1764/* ===================================================================== */
1765/* Methods of BZ2Decomp. */
1766
1767PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1768"decompress(data) -> string\n\
1769\n\
1770Provide more data to the decompressor object. It will return chunks\n\
1771of decompressed data whenever possible. If you try to decompress data\n\
1772after the end of stream is found, EOFError will be raised. If any data\n\
1773was found after the end of stream, it'll be ignored and saved in\n\
1774unused_data attribute.\n\
1775");
1776
1777static PyObject *
1778BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1779{
1780 char *data;
1781 int datasize;
1782 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001783 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001784 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001785 bz_stream *bzs = &self->bzs;
1786 int bzerror;
1787
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001788 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001789 return NULL;
1790
1791 ACQUIRE_LOCK(self);
1792 if (!self->running) {
1793 PyErr_SetString(PyExc_EOFError, "end of stream was "
1794 "already found");
1795 goto error;
1796 }
1797
1798 ret = PyString_FromStringAndSize(NULL, bufsize);
1799 if (!ret)
1800 goto error;
1801
1802 bzs->next_in = data;
1803 bzs->avail_in = datasize;
1804 bzs->next_out = BUF(ret);
1805 bzs->avail_out = bufsize;
1806
1807 totalout = BZS_TOTAL_OUT(bzs);
1808
1809 for (;;) {
1810 Py_BEGIN_ALLOW_THREADS
1811 bzerror = BZ2_bzDecompress(bzs);
1812 Py_END_ALLOW_THREADS
1813 if (bzerror == BZ_STREAM_END) {
1814 if (bzs->avail_in != 0) {
1815 Py_DECREF(self->unused_data);
1816 self->unused_data =
1817 PyString_FromStringAndSize(bzs->next_in,
1818 bzs->avail_in);
1819 }
1820 self->running = 0;
1821 break;
1822 }
1823 if (bzerror != BZ_OK) {
1824 Util_CatchBZ2Error(bzerror);
1825 goto error;
1826 }
1827 if (bzs->avail_out == 0) {
1828 bufsize = Util_NewBufferSize(bufsize);
1829 if (_PyString_Resize(&ret, bufsize) < 0) {
1830 BZ2_bzDecompressEnd(bzs);
1831 goto error;
1832 }
1833 bzs->next_out = BUF(ret);
1834 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1835 - totalout);
1836 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1837 } else if (bzs->avail_in == 0) {
1838 break;
1839 }
1840 }
1841
1842 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001843 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001844
1845 RELEASE_LOCK(self);
1846 return ret;
1847
1848error:
1849 RELEASE_LOCK(self);
1850 Py_XDECREF(ret);
1851 return NULL;
1852}
1853
1854static PyMethodDef BZ2Decomp_methods[] = {
1855 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1856 {NULL, NULL} /* sentinel */
1857};
1858
1859
1860/* ===================================================================== */
1861/* Slot definitions for BZ2Decomp_Type. */
1862
1863static int
1864BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1865{
1866 int bzerror;
1867
1868 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1869 return -1;
1870
1871#ifdef WITH_THREAD
1872 self->lock = PyThread_allocate_lock();
1873 if (!self->lock)
1874 goto error;
1875#endif
1876
1877 self->unused_data = PyString_FromString("");
1878 if (!self->unused_data)
1879 goto error;
1880
1881 memset(&self->bzs, 0, sizeof(bz_stream));
1882 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1883 if (bzerror != BZ_OK) {
1884 Util_CatchBZ2Error(bzerror);
1885 goto error;
1886 }
1887
1888 self->running = 1;
1889
1890 return 0;
1891
1892error:
1893#ifdef WITH_THREAD
1894 if (self->lock)
1895 PyThread_free_lock(self->lock);
1896#endif
1897 Py_XDECREF(self->unused_data);
1898 return -1;
1899}
1900
1901static void
1902BZ2Decomp_dealloc(BZ2DecompObject *self)
1903{
1904#ifdef WITH_THREAD
1905 if (self->lock)
1906 PyThread_free_lock(self->lock);
1907#endif
1908 Py_XDECREF(self->unused_data);
1909 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001910 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001911}
1912
1913
1914/* ===================================================================== */
1915/* BZ2Decomp_Type definition. */
1916
1917PyDoc_STRVAR(BZ2Decomp__doc__,
1918"BZ2Decompressor() -> decompressor object\n\
1919\n\
1920Create a new decompressor object. This object may be used to decompress\n\
1921data sequentially. If you want to decompress data in one shot, use the\n\
1922decompress() function instead.\n\
1923");
1924
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001925static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001926 PyObject_HEAD_INIT(NULL)
1927 0, /*ob_size*/
1928 "bz2.BZ2Decompressor", /*tp_name*/
1929 sizeof(BZ2DecompObject), /*tp_basicsize*/
1930 0, /*tp_itemsize*/
1931 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1932 0, /*tp_print*/
1933 0, /*tp_getattr*/
1934 0, /*tp_setattr*/
1935 0, /*tp_compare*/
1936 0, /*tp_repr*/
1937 0, /*tp_as_number*/
1938 0, /*tp_as_sequence*/
1939 0, /*tp_as_mapping*/
1940 0, /*tp_hash*/
1941 0, /*tp_call*/
1942 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001943 PyObject_GenericGetAttr,/*tp_getattro*/
1944 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001945 0, /*tp_as_buffer*/
1946 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1947 BZ2Decomp__doc__, /*tp_doc*/
1948 0, /*tp_traverse*/
1949 0, /*tp_clear*/
1950 0, /*tp_richcompare*/
1951 0, /*tp_weaklistoffset*/
1952 0, /*tp_iter*/
1953 0, /*tp_iternext*/
1954 BZ2Decomp_methods, /*tp_methods*/
1955 BZ2Decomp_members, /*tp_members*/
1956 0, /*tp_getset*/
1957 0, /*tp_base*/
1958 0, /*tp_dict*/
1959 0, /*tp_descr_get*/
1960 0, /*tp_descr_set*/
1961 0, /*tp_dictoffset*/
1962 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001963 PyType_GenericAlloc, /*tp_alloc*/
1964 PyType_GenericNew, /*tp_new*/
1965 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966 0, /*tp_is_gc*/
1967};
1968
1969
1970/* ===================================================================== */
1971/* Module functions. */
1972
1973PyDoc_STRVAR(bz2_compress__doc__,
1974"compress(data [, compresslevel=9]) -> string\n\
1975\n\
1976Compress data in one shot. If you want to compress data sequentially,\n\
1977use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1978given, must be a number between 1 and 9.\n\
1979");
1980
1981static PyObject *
1982bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1983{
1984 int compresslevel=9;
1985 char *data;
1986 int datasize;
1987 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001988 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001989 bz_stream _bzs;
1990 bz_stream *bzs = &_bzs;
1991 int bzerror;
1992 static char *kwlist[] = {"data", "compresslevel", 0};
1993
1994 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1995 kwlist, &data, &datasize,
1996 &compresslevel))
1997 return NULL;
1998
1999 if (compresslevel < 1 || compresslevel > 9) {
2000 PyErr_SetString(PyExc_ValueError,
2001 "compresslevel must be between 1 and 9");
2002 return NULL;
2003 }
2004
2005 /* Conforming to bz2 manual, this is large enough to fit compressed
2006 * data in one shot. We will check it later anyway. */
2007 bufsize = datasize + (datasize/100+1) + 600;
2008
2009 ret = PyString_FromStringAndSize(NULL, bufsize);
2010 if (!ret)
2011 return NULL;
2012
2013 memset(bzs, 0, sizeof(bz_stream));
2014
2015 bzs->next_in = data;
2016 bzs->avail_in = datasize;
2017 bzs->next_out = BUF(ret);
2018 bzs->avail_out = bufsize;
2019
2020 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2021 if (bzerror != BZ_OK) {
2022 Util_CatchBZ2Error(bzerror);
2023 Py_DECREF(ret);
2024 return NULL;
2025 }
Tim Peterse3228092002-11-09 04:21:44 +00002026
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002027 for (;;) {
2028 Py_BEGIN_ALLOW_THREADS
2029 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2030 Py_END_ALLOW_THREADS
2031 if (bzerror == BZ_STREAM_END) {
2032 break;
2033 } else if (bzerror != BZ_FINISH_OK) {
2034 BZ2_bzCompressEnd(bzs);
2035 Util_CatchBZ2Error(bzerror);
2036 Py_DECREF(ret);
2037 return NULL;
2038 }
2039 if (bzs->avail_out == 0) {
2040 bufsize = Util_NewBufferSize(bufsize);
2041 if (_PyString_Resize(&ret, bufsize) < 0) {
2042 BZ2_bzCompressEnd(bzs);
2043 Py_DECREF(ret);
2044 return NULL;
2045 }
2046 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2047 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2048 }
2049 }
2050
2051 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002052 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002053 BZ2_bzCompressEnd(bzs);
2054
2055 return ret;
2056}
2057
2058PyDoc_STRVAR(bz2_decompress__doc__,
2059"decompress(data) -> decompressed data\n\
2060\n\
2061Decompress data in one shot. If you want to decompress data sequentially,\n\
2062use an instance of BZ2Decompressor instead.\n\
2063");
2064
2065static PyObject *
2066bz2_decompress(PyObject *self, PyObject *args)
2067{
2068 char *data;
2069 int datasize;
2070 int bufsize = SMALLCHUNK;
2071 PyObject *ret;
2072 bz_stream _bzs;
2073 bz_stream *bzs = &_bzs;
2074 int bzerror;
2075
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002076 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002077 return NULL;
2078
2079 if (datasize == 0)
2080 return PyString_FromString("");
2081
2082 ret = PyString_FromStringAndSize(NULL, bufsize);
2083 if (!ret)
2084 return NULL;
2085
2086 memset(bzs, 0, sizeof(bz_stream));
2087
2088 bzs->next_in = data;
2089 bzs->avail_in = datasize;
2090 bzs->next_out = BUF(ret);
2091 bzs->avail_out = bufsize;
2092
2093 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2094 if (bzerror != BZ_OK) {
2095 Util_CatchBZ2Error(bzerror);
2096 Py_DECREF(ret);
2097 return NULL;
2098 }
Tim Peterse3228092002-11-09 04:21:44 +00002099
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002100 for (;;) {
2101 Py_BEGIN_ALLOW_THREADS
2102 bzerror = BZ2_bzDecompress(bzs);
2103 Py_END_ALLOW_THREADS
2104 if (bzerror == BZ_STREAM_END) {
2105 break;
2106 } else if (bzerror != BZ_OK) {
2107 BZ2_bzDecompressEnd(bzs);
2108 Util_CatchBZ2Error(bzerror);
2109 Py_DECREF(ret);
2110 return NULL;
2111 }
2112 if (bzs->avail_out == 0) {
2113 bufsize = Util_NewBufferSize(bufsize);
2114 if (_PyString_Resize(&ret, bufsize) < 0) {
2115 BZ2_bzDecompressEnd(bzs);
2116 Py_DECREF(ret);
2117 return NULL;
2118 }
2119 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2120 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2121 } else if (bzs->avail_in == 0) {
2122 BZ2_bzDecompressEnd(bzs);
2123 PyErr_SetString(PyExc_ValueError,
2124 "couldn't find end of stream");
2125 Py_DECREF(ret);
2126 return NULL;
2127 }
2128 }
2129
2130 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002131 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002132 BZ2_bzDecompressEnd(bzs);
2133
2134 return ret;
2135}
2136
2137static PyMethodDef bz2_methods[] = {
2138 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2139 bz2_compress__doc__},
2140 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2141 bz2_decompress__doc__},
2142 {NULL, NULL} /* sentinel */
2143};
2144
2145/* ===================================================================== */
2146/* Initialization function. */
2147
2148PyDoc_STRVAR(bz2__doc__,
2149"The python bz2 module provides a comprehensive interface for\n\
2150the bz2 compression library. It implements a complete file\n\
2151interface, one shot (de)compression functions, and types for\n\
2152sequential (de)compression.\n\
2153");
2154
Neal Norwitz21d896c2003-07-01 20:15:21 +00002155PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002156initbz2(void)
2157{
2158 PyObject *m;
2159
2160 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002161 BZ2Comp_Type.ob_type = &PyType_Type;
2162 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002163
2164 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2165
2166 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2167
2168 Py_INCREF(&BZ2File_Type);
2169 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2170
2171 Py_INCREF(&BZ2Comp_Type);
2172 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2173
2174 Py_INCREF(&BZ2Decomp_Type);
2175 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2176}