blob: f358de7e91956e4b11be450344ec414aca9b7040 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
34#if SIZEOF_LONG >= 8
35#define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37#elif SIZEOF_LONG_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
Tim Peterse3228092002-11-09 04:21:44 +000039 (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000040#else
41#define BZS_TOTAL_OUT(bzs) \
42 bzs->total_out_lo32;
43#endif
44
45#ifdef WITH_THREAD
46#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48#else
49#define ACQUIRE_LOCK(obj)
50#define RELEASE_LOCK(obj)
51#endif
52
53#ifdef WITH_UNIVERSAL_NEWLINES
54/* Bits in f_newlinetypes */
55#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56#define NEWLINE_CR 1 /* \r newline seen */
57#define NEWLINE_LF 2 /* \n newline seen */
58#define NEWLINE_CRLF 4 /* \r\n newline seen */
59#endif
60
61/* ===================================================================== */
62/* Structure definitions. */
63
64typedef struct {
65 PyFileObject file;
66 BZFILE *fp;
67 int mode;
68 long pos;
69 long size;
70#ifdef WITH_THREAD
71 PyThread_type_lock lock;
72#endif
73} BZ2FileObject;
74
75typedef struct {
76 PyObject_HEAD
77 bz_stream bzs;
78 int running;
79#ifdef WITH_THREAD
80 PyThread_type_lock lock;
81#endif
82} BZ2CompObject;
83
84typedef struct {
85 PyObject_HEAD
86 bz_stream bzs;
87 int running;
88 PyObject *unused_data;
89#ifdef WITH_THREAD
90 PyThread_type_lock lock;
91#endif
92} BZ2DecompObject;
93
94/* ===================================================================== */
95/* Utility functions. */
96
97static int
98Util_CatchBZ2Error(int bzerror)
99{
100 int ret = 0;
101 switch(bzerror) {
102 case BZ_OK:
103 case BZ_STREAM_END:
104 break;
105
106 case BZ_CONFIG_ERROR:
107 PyErr_SetString(PyExc_SystemError,
108 "the bz2 library was not compiled "
109 "correctly");
110 ret = 1;
111 break;
Tim Peterse3228092002-11-09 04:21:44 +0000112
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000113 case BZ_PARAM_ERROR:
114 PyErr_SetString(PyExc_ValueError,
115 "the bz2 library has received wrong "
116 "parameters");
117 ret = 1;
118 break;
Tim Peterse3228092002-11-09 04:21:44 +0000119
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120 case BZ_MEM_ERROR:
121 PyErr_NoMemory();
122 ret = 1;
123 break;
124
125 case BZ_DATA_ERROR:
126 case BZ_DATA_ERROR_MAGIC:
127 PyErr_SetString(PyExc_IOError, "invalid data stream");
128 ret = 1;
129 break;
130
131 case BZ_IO_ERROR:
132 PyErr_SetString(PyExc_IOError, "unknown IO error");
133 ret = 1;
134 break;
135
136 case BZ_UNEXPECTED_EOF:
137 PyErr_SetString(PyExc_EOFError,
138 "compressed file ended before the "
139 "logical end-of-stream was detected");
140 ret = 1;
141 break;
142
143 case BZ_SEQUENCE_ERROR:
144 PyErr_SetString(PyExc_RuntimeError,
145 "wrong sequence of bz2 library "
146 "commands used");
147 ret = 1;
148 break;
149 }
150 return ret;
151}
152
153#if BUFSIZ < 8192
154#define SMALLCHUNK 8192
155#else
156#define SMALLCHUNK BUFSIZ
157#endif
158
159#if SIZEOF_INT < 4
160#define BIGCHUNK (512 * 32)
161#else
162#define BIGCHUNK (512 * 1024)
163#endif
164
165/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
166static size_t
167Util_NewBufferSize(size_t currentsize)
168{
169 if (currentsize > SMALLCHUNK) {
170 /* Keep doubling until we reach BIGCHUNK;
171 then keep adding BIGCHUNK. */
172 if (currentsize <= BIGCHUNK)
173 return currentsize + currentsize;
174 else
175 return currentsize + BIGCHUNK;
176 }
177 return currentsize + SMALLCHUNK;
178}
179
180/* This is a hacked version of Python's fileobject.c:get_line(). */
181static PyObject *
182Util_GetLine(BZ2FileObject *self, int n)
183{
184 char c;
185 char *buf, *end;
186 size_t total_v_size; /* total # of slots in buffer */
187 size_t used_v_size; /* # used slots in buffer */
188 size_t increment; /* amount to increment the buffer */
189 PyObject *v;
190 int bzerror;
191#ifdef WITH_UNIVERSAL_NEWLINES
192 int newlinetypes = ((PyFileObject*)self)->f_newlinetypes;
193 int skipnextlf = ((PyFileObject*)self)->f_skipnextlf;
194 int univ_newline = ((PyFileObject*)self)->f_univ_newline;
195#endif
196
197 total_v_size = n > 0 ? n : 100;
198 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
199 if (v == NULL)
200 return NULL;
201
202 buf = BUF(v);
203 end = buf + total_v_size;
204
205 for (;;) {
206 Py_BEGIN_ALLOW_THREADS
207#ifdef WITH_UNIVERSAL_NEWLINES
208 if (univ_newline) {
209 while (1) {
210 BZ2_bzRead(&bzerror, self->fp, &c, 1);
211 self->pos++;
212 if (bzerror != BZ_OK || buf == end)
213 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000214 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000215 skipnextlf = 0;
216 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000217 /* Seeing a \n here with
218 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000219 * saw a \r before.
220 */
221 newlinetypes |= NEWLINE_CRLF;
222 BZ2_bzRead(&bzerror, self->fp,
223 &c, 1);
224 if (bzerror != BZ_OK)
225 break;
226 } else {
227 newlinetypes |= NEWLINE_CR;
228 }
229 }
230 if (c == '\r') {
231 skipnextlf = 1;
232 c = '\n';
233 } else if ( c == '\n')
234 newlinetypes |= NEWLINE_LF;
235 *buf++ = c;
236 if (c == '\n') break;
237 }
238 if (bzerror == BZ_STREAM_END && skipnextlf)
239 newlinetypes |= NEWLINE_CR;
240 } else /* If not universal newlines use the normal loop */
241#endif
242 do {
243 BZ2_bzRead(&bzerror, self->fp, &c, 1);
244 self->pos++;
245 *buf++ = c;
246 } while (bzerror == BZ_OK && c != '\n' && buf != end);
247 Py_END_ALLOW_THREADS
248#ifdef WITH_UNIVERSAL_NEWLINES
249 ((PyFileObject*)self)->f_newlinetypes = newlinetypes;
250 ((PyFileObject*)self)->f_skipnextlf = skipnextlf;
251#endif
252 if (bzerror == BZ_STREAM_END) {
253 self->size = self->pos;
254 self->mode = MODE_READ_EOF;
255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
260 }
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
274 }
275 if (_PyString_Resize(&v, total_v_size) < 0)
276 return NULL;
277 buf = BUF(v) + used_v_size;
278 end = BUF(v) + total_v_size;
279 }
280
281 used_v_size = buf - BUF(v);
282 if (used_v_size != total_v_size)
283 _PyString_Resize(&v, used_v_size);
284 return v;
285}
286
287#ifndef WITH_UNIVERSAL_NEWLINES
288#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
289#else
290/* This is a hacked version of Python's
291 * fileobject.c:Py_UniversalNewlineFread(). */
292size_t
293Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
294 char* buf, size_t n, BZ2FileObject *fobj)
295{
296 char *dst = buf;
297 PyFileObject *f = (PyFileObject *)fobj;
298 int newlinetypes, skipnextlf;
299
300 assert(buf != NULL);
301 assert(stream != NULL);
302
303 if (!f->f_univ_newline)
304 return BZ2_bzRead(bzerror, stream, buf, n);
305
306 newlinetypes = f->f_newlinetypes;
307 skipnextlf = f->f_skipnextlf;
308
309 /* Invariant: n is the number of bytes remaining to be filled
310 * in the buffer.
311 */
312 while (n) {
313 size_t nread;
314 int shortread;
315 char *src = dst;
316
317 nread = BZ2_bzRead(bzerror, stream, dst, n);
318 assert(nread <= n);
319 n -= nread; /* assuming 1 byte out for each in; will adjust */
320 shortread = n != 0; /* true iff EOF or error */
321 while (nread--) {
322 char c = *src++;
323 if (c == '\r') {
324 /* Save as LF and set flag to skip next LF. */
325 *dst++ = '\n';
326 skipnextlf = 1;
327 }
328 else if (skipnextlf && c == '\n') {
329 /* Skip LF, and remember we saw CR LF. */
330 skipnextlf = 0;
331 newlinetypes |= NEWLINE_CRLF;
332 ++n;
333 }
334 else {
335 /* Normal char to be stored in buffer. Also
336 * update the newlinetypes flag if either this
337 * is an LF or the previous char was a CR.
338 */
339 if (c == '\n')
340 newlinetypes |= NEWLINE_LF;
341 else if (skipnextlf)
342 newlinetypes |= NEWLINE_CR;
343 *dst++ = c;
344 skipnextlf = 0;
345 }
346 }
347 if (shortread) {
348 /* If this is EOF, update type flags. */
349 if (skipnextlf && *bzerror == BZ_STREAM_END)
350 newlinetypes |= NEWLINE_CR;
351 break;
352 }
353 }
354 f->f_newlinetypes = newlinetypes;
355 f->f_skipnextlf = skipnextlf;
356 return dst - buf;
357}
358#endif
359
360/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
361static void
362Util_DropReadAhead(BZ2FileObject *self)
363{
364 PyFileObject *f = (PyFileObject*)self;
365 if (f->f_buf != NULL) {
366 PyMem_Free(f->f_buf);
367 f->f_buf = NULL;
368 }
369}
370
371/* This is a hacked version of Python's fileobject.c:readahead(). */
372static int
373Util_ReadAhead(BZ2FileObject *self, int bufsize)
374{
375 int chunksize;
376 int bzerror;
377 PyFileObject *f = (PyFileObject*)self;
378
379 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000380 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000381 return 0;
382 else
383 Util_DropReadAhead(self);
384 }
385 if (self->mode == MODE_READ_EOF) {
386 return -1;
387 }
388 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
389 return -1;
390 }
391 Py_BEGIN_ALLOW_THREADS
392 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, f->f_buf,
393 bufsize, self);
394 Py_END_ALLOW_THREADS
395 self->pos += chunksize;
396 if (bzerror == BZ_STREAM_END) {
397 self->size = self->pos;
398 self->mode = MODE_READ_EOF;
399 } else if (bzerror != BZ_OK) {
400 Util_CatchBZ2Error(bzerror);
401 Util_DropReadAhead(self);
402 return -1;
403 }
404 f->f_bufptr = f->f_buf;
405 f->f_bufend = f->f_buf + chunksize;
406 return 0;
407}
408
409/* This is a hacked version of Python's
410 * fileobject.c:readahead_get_line_skip(). */
411static PyStringObject *
412Util_ReadAheadGetLineSkip(BZ2FileObject *bf, int skip, int bufsize)
413{
414 PyFileObject *f = (PyFileObject*)bf;
415 PyStringObject* s;
416 char *bufptr;
417 char *buf;
418 int len;
419
420 if (f->f_buf == NULL)
Tim Peterse3228092002-11-09 04:21:44 +0000421 if (Util_ReadAhead(bf, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 return NULL;
423
424 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000425 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000426 return (PyStringObject *)
427 PyString_FromStringAndSize(NULL, skip);
428 bufptr = memchr(f->f_bufptr, '\n', len);
429 if (bufptr != NULL) {
430 bufptr++; /* Count the '\n' */
431 len = bufptr - f->f_bufptr;
432 s = (PyStringObject *)
433 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000434 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 return NULL;
436 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
437 f->f_bufptr = bufptr;
438 if (bufptr == f->f_bufend)
439 Util_DropReadAhead(bf);
440 } else {
441 bufptr = f->f_bufptr;
442 buf = f->f_buf;
443 f->f_buf = NULL; /* Force new readahead buffer */
444 s = Util_ReadAheadGetLineSkip(
445 bf, skip+len, bufsize + (bufsize>>2) );
446 if (s == NULL) {
447 PyMem_Free(buf);
448 return NULL;
449 }
450 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
451 PyMem_Free(buf);
452 }
453 return s;
454}
455
456/* ===================================================================== */
457/* Methods of BZ2File. */
458
459PyDoc_STRVAR(BZ2File_read__doc__,
460"read([size]) -> string\n\
461\n\
462Read at most size uncompressed bytes, returned as a string. If the size\n\
463argument is negative or omitted, read until EOF is reached.\n\
464");
465
466/* This is a hacked version of Python's fileobject.c:file_read(). */
467static PyObject *
468BZ2File_read(BZ2FileObject *self, PyObject *args)
469{
470 long bytesrequested = -1;
471 size_t bytesread, buffersize, chunksize;
472 int bzerror;
473 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000474
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
476 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000477
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000478 ACQUIRE_LOCK(self);
479 switch (self->mode) {
480 case MODE_READ:
481 break;
482 case MODE_READ_EOF:
483 ret = PyString_FromString("");
484 goto cleanup;
485 case MODE_CLOSED:
486 PyErr_SetString(PyExc_ValueError,
487 "I/O operation on closed file");
488 goto cleanup;
489 default:
490 PyErr_SetString(PyExc_IOError,
491 "file is not ready for reading");
492 goto cleanup;
493 }
494
495 if (bytesrequested < 0)
496 buffersize = Util_NewBufferSize((size_t)0);
497 else
498 buffersize = bytesrequested;
499 if (buffersize > INT_MAX) {
500 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000501 "requested number of bytes is "
502 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000503 goto cleanup;
504 }
505 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
506 if (ret == NULL)
507 goto cleanup;
508 bytesread = 0;
509
510 for (;;) {
511 Py_BEGIN_ALLOW_THREADS
512 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
513 BUF(ret)+bytesread,
514 buffersize-bytesread,
515 self);
516 self->pos += chunksize;
517 Py_END_ALLOW_THREADS
518 bytesread += chunksize;
519 if (bzerror == BZ_STREAM_END) {
520 self->size = self->pos;
521 self->mode = MODE_READ_EOF;
522 break;
523 } else if (bzerror != BZ_OK) {
524 Util_CatchBZ2Error(bzerror);
525 Py_DECREF(ret);
526 ret = NULL;
527 goto cleanup;
528 }
529 if (bytesrequested < 0) {
530 buffersize = Util_NewBufferSize(buffersize);
531 if (_PyString_Resize(&ret, buffersize) < 0)
532 goto cleanup;
533 } else {
534 break;
535 }
536 }
537 if (bytesread != buffersize)
538 _PyString_Resize(&ret, bytesread);
539
540cleanup:
541 RELEASE_LOCK(self);
542 return ret;
543}
544
545PyDoc_STRVAR(BZ2File_readline__doc__,
546"readline([size]) -> string\n\
547\n\
548Return the next line from the file, as a string, retaining newline.\n\
549A non-negative size argument will limit the maximum number of bytes to\n\
550return (an incomplete line may be returned then). Return an empty\n\
551string at EOF.\n\
552");
553
554static PyObject *
555BZ2File_readline(BZ2FileObject *self, PyObject *args)
556{
557 PyObject *ret = NULL;
558 int sizehint = -1;
559
560 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
561 return NULL;
562
563 ACQUIRE_LOCK(self);
564 switch (self->mode) {
565 case MODE_READ:
566 break;
567 case MODE_READ_EOF:
568 ret = PyString_FromString("");
569 goto cleanup;
570 case MODE_CLOSED:
571 PyErr_SetString(PyExc_ValueError,
572 "I/O operation on closed file");
573 goto cleanup;
574 default:
575 PyErr_SetString(PyExc_IOError,
576 "file is not ready for reading");
577 goto cleanup;
578 }
579
580 if (sizehint == 0)
581 ret = PyString_FromString("");
582 else
583 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
584
585cleanup:
586 RELEASE_LOCK(self);
587 return ret;
588}
589
590PyDoc_STRVAR(BZ2File_readlines__doc__,
591"readlines([size]) -> list\n\
592\n\
593Call readline() repeatedly and return a list of lines read.\n\
594The optional size argument, if given, is an approximate bound on the\n\
595total number of bytes in the lines returned.\n\
596");
597
598/* This is a hacked version of Python's fileobject.c:file_readlines(). */
599static PyObject *
600BZ2File_readlines(BZ2FileObject *self, PyObject *args)
601{
602 long sizehint = 0;
603 PyObject *list = NULL;
604 PyObject *line;
605 char small_buffer[SMALLCHUNK];
606 char *buffer = small_buffer;
607 size_t buffersize = SMALLCHUNK;
608 PyObject *big_buffer = NULL;
609 size_t nfilled = 0;
610 size_t nread;
611 size_t totalread = 0;
612 char *p, *q, *end;
613 int err;
614 int shortread = 0;
615 int bzerror;
616
617 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
618 return NULL;
619
620 ACQUIRE_LOCK(self);
621 switch (self->mode) {
622 case MODE_READ:
623 break;
624 case MODE_READ_EOF:
625 list = PyList_New(0);
626 goto cleanup;
627 case MODE_CLOSED:
628 PyErr_SetString(PyExc_ValueError,
629 "I/O operation on closed file");
630 goto cleanup;
631 default:
632 PyErr_SetString(PyExc_IOError,
633 "file is not ready for reading");
634 goto cleanup;
635 }
636
637 if ((list = PyList_New(0)) == NULL)
638 goto cleanup;
639
640 for (;;) {
641 Py_BEGIN_ALLOW_THREADS
642 nread = Util_UnivNewlineRead(&bzerror, self->fp,
643 buffer+nfilled,
644 buffersize-nfilled, self);
645 self->pos += nread;
646 Py_END_ALLOW_THREADS
647 if (bzerror == BZ_STREAM_END) {
648 self->size = self->pos;
649 self->mode = MODE_READ_EOF;
650 if (nread == 0) {
651 sizehint = 0;
652 break;
653 }
654 shortread = 1;
655 } else if (bzerror != BZ_OK) {
656 Util_CatchBZ2Error(bzerror);
657 error:
658 Py_DECREF(list);
659 list = NULL;
660 goto cleanup;
661 }
662 totalread += nread;
663 p = memchr(buffer+nfilled, '\n', nread);
664 if (p == NULL) {
665 /* Need a larger buffer to fit this line */
666 nfilled += nread;
667 buffersize *= 2;
668 if (buffersize > INT_MAX) {
669 PyErr_SetString(PyExc_OverflowError,
670 "line is longer than a Python string can hold");
671 goto error;
672 }
673 if (big_buffer == NULL) {
674 /* Create the big buffer */
675 big_buffer = PyString_FromStringAndSize(
676 NULL, buffersize);
677 if (big_buffer == NULL)
678 goto error;
679 buffer = PyString_AS_STRING(big_buffer);
680 memcpy(buffer, small_buffer, nfilled);
681 }
682 else {
683 /* Grow the big buffer */
684 _PyString_Resize(&big_buffer, buffersize);
685 buffer = PyString_AS_STRING(big_buffer);
686 }
687 continue;
688 }
689 end = buffer+nfilled+nread;
690 q = buffer;
691 do {
692 /* Process complete lines */
693 p++;
694 line = PyString_FromStringAndSize(q, p-q);
695 if (line == NULL)
696 goto error;
697 err = PyList_Append(list, line);
698 Py_DECREF(line);
699 if (err != 0)
700 goto error;
701 q = p;
702 p = memchr(q, '\n', end-q);
703 } while (p != NULL);
704 /* Move the remaining incomplete line to the start */
705 nfilled = end-q;
706 memmove(buffer, q, nfilled);
707 if (sizehint > 0)
708 if (totalread >= (size_t)sizehint)
709 break;
710 if (shortread) {
711 sizehint = 0;
712 break;
713 }
714 }
715 if (nfilled != 0) {
716 /* Partial last line */
717 line = PyString_FromStringAndSize(buffer, nfilled);
718 if (line == NULL)
719 goto error;
720 if (sizehint > 0) {
721 /* Need to complete the last line */
722 PyObject *rest = Util_GetLine(self, 0);
723 if (rest == NULL) {
724 Py_DECREF(line);
725 goto error;
726 }
727 PyString_Concat(&line, rest);
728 Py_DECREF(rest);
729 if (line == NULL)
730 goto error;
731 }
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 }
737
738 cleanup:
739 RELEASE_LOCK(self);
740 if (big_buffer) {
741 Py_DECREF(big_buffer);
742 }
743 return list;
744}
745
746PyDoc_STRVAR(BZ2File_write__doc__,
747"write(data) -> None\n\
748\n\
749Write the 'data' string to file. Note that due to buffering, close() may\n\
750be needed before the file on disk reflects the data written.\n\
751");
752
753/* This is a hacked version of Python's fileobject.c:file_write(). */
754static PyObject *
755BZ2File_write(BZ2FileObject *self, PyObject *args)
756{
757 PyObject *ret = NULL;
758 char *buf;
759 int len;
760 int bzerror;
761
762 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
763 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000764
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000765 ACQUIRE_LOCK(self);
766 switch (self->mode) {
767 case MODE_WRITE:
768 break;
Tim Peterse3228092002-11-09 04:21:44 +0000769
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000770 case MODE_CLOSED:
771 PyErr_SetString(PyExc_ValueError,
772 "I/O operation on closed file");
773 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000774
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000775 default:
776 PyErr_SetString(PyExc_IOError,
777 "file is not ready for writing");
778 goto cleanup;;
779 }
780
781 PyFile_SoftSpace((PyObject*)self, 0);
782
783 Py_BEGIN_ALLOW_THREADS
784 BZ2_bzWrite (&bzerror, self->fp, buf, len);
785 self->pos += len;
786 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788 if (bzerror != BZ_OK) {
789 Util_CatchBZ2Error(bzerror);
790 goto cleanup;
791 }
Tim Peterse3228092002-11-09 04:21:44 +0000792
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000793 Py_INCREF(Py_None);
794 ret = Py_None;
795
796cleanup:
797 RELEASE_LOCK(self);
798 return ret;
799}
800
801PyDoc_STRVAR(BZ2File_writelines__doc__,
802"writelines(sequence_of_strings) -> None\n\
803\n\
804Write the sequence of strings to the file. Note that newlines are not\n\
805added. The sequence can be any iterable object producing strings. This is\n\
806equivalent to calling write() for each string.\n\
807");
808
809/* This is a hacked version of Python's fileobject.c:file_writelines(). */
810static PyObject *
811BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
812{
813#define CHUNKSIZE 1000
814 PyObject *list = NULL;
815 PyObject *iter = NULL;
816 PyObject *ret = NULL;
817 PyObject *line;
818 int i, j, index, len, islist;
819 int bzerror;
820
821 ACQUIRE_LOCK(self);
822 islist = PyList_Check(seq);
823 if (!islist) {
824 iter = PyObject_GetIter(seq);
825 if (iter == NULL) {
826 PyErr_SetString(PyExc_TypeError,
827 "writelines() requires an iterable argument");
828 goto error;
829 }
830 list = PyList_New(CHUNKSIZE);
831 if (list == NULL)
832 goto error;
833 }
834
835 /* Strategy: slurp CHUNKSIZE lines into a private list,
836 checking that they are all strings, then write that list
837 without holding the interpreter lock, then come back for more. */
838 for (index = 0; ; index += CHUNKSIZE) {
839 if (islist) {
840 Py_XDECREF(list);
841 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
842 if (list == NULL)
843 goto error;
844 j = PyList_GET_SIZE(list);
845 }
846 else {
847 for (j = 0; j < CHUNKSIZE; j++) {
848 line = PyIter_Next(iter);
849 if (line == NULL) {
850 if (PyErr_Occurred())
851 goto error;
852 break;
853 }
854 PyList_SetItem(list, j, line);
855 }
856 }
857 if (j == 0)
858 break;
859
860 /* Check that all entries are indeed strings. If not,
861 apply the same rules as for file.write() and
862 convert the rets to strings. This is slow, but
863 seems to be the only way since all conversion APIs
864 could potentially execute Python code. */
865 for (i = 0; i < j; i++) {
866 PyObject *v = PyList_GET_ITEM(list, i);
867 if (!PyString_Check(v)) {
868 const char *buffer;
869 int len;
870 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
871 PyErr_SetString(PyExc_TypeError,
872 "writelines() "
873 "argument must be "
874 "a sequence of "
875 "strings");
876 goto error;
877 }
878 line = PyString_FromStringAndSize(buffer,
879 len);
880 if (line == NULL)
881 goto error;
882 Py_DECREF(v);
883 PyList_SET_ITEM(list, i, line);
884 }
885 }
886
887 PyFile_SoftSpace((PyObject*)self, 0);
888
889 /* Since we are releasing the global lock, the
890 following code may *not* execute Python code. */
891 Py_BEGIN_ALLOW_THREADS
892 for (i = 0; i < j; i++) {
893 line = PyList_GET_ITEM(list, i);
894 len = PyString_GET_SIZE(line);
895 BZ2_bzWrite (&bzerror, self->fp,
896 PyString_AS_STRING(line), len);
897 if (bzerror != BZ_OK) {
898 Py_BLOCK_THREADS
899 Util_CatchBZ2Error(bzerror);
900 goto error;
901 }
902 }
903 Py_END_ALLOW_THREADS
904
905 if (j < CHUNKSIZE)
906 break;
907 }
908
909 Py_INCREF(Py_None);
910 ret = Py_None;
911
912 error:
913 RELEASE_LOCK(self);
914 Py_XDECREF(list);
915 Py_XDECREF(iter);
916 return ret;
917#undef CHUNKSIZE
918}
919
920PyDoc_STRVAR(BZ2File_seek__doc__,
921"seek(offset [, whence]) -> None\n\
922\n\
923Move to new file position. Argument offset is a byte count. Optional\n\
924argument whence defaults to 0 (offset from start of file, offset\n\
925should be >= 0); other values are 1 (move relative to current position,\n\
926positive or negative), and 2 (move relative to end of file, usually\n\
927negative, although many platforms allow seeking beyond the end of a file).\n\
928\n\
929Note that seeking of bz2 files is emulated, and depending on the parameters\n\
930the operation may be extremely slow.\n\
931");
932
933static PyObject *
934BZ2File_seek(BZ2FileObject *self, PyObject *args)
935{
936 int where = 0;
937 long offset;
938 char small_buffer[SMALLCHUNK];
939 char *buffer = small_buffer;
940 size_t buffersize = SMALLCHUNK;
941 int bytesread = 0;
942 int readsize;
943 int chunksize;
944 int bzerror;
945 int rewind = 0;
946 PyObject *func;
947 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000948
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
950 return NULL;
951
952 ACQUIRE_LOCK(self);
953 Util_DropReadAhead(self);
954 switch (self->mode) {
955 case MODE_READ:
956 case MODE_READ_EOF:
957 break;
Tim Peterse3228092002-11-09 04:21:44 +0000958
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959 case MODE_CLOSED:
960 PyErr_SetString(PyExc_ValueError,
961 "I/O operation on closed file");
962 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000963
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000964 default:
965 PyErr_SetString(PyExc_IOError,
966 "seek works only while reading");
967 goto cleanup;;
968 }
969
970 if (offset < 0) {
971 if (where == 1) {
972 offset = self->pos + offset;
973 rewind = 1;
974 } else if (where == 2) {
975 if (self->size == -1) {
976 assert(self->mode != MODE_READ_EOF);
977 for (;;) {
978 Py_BEGIN_ALLOW_THREADS
979 chunksize = Util_UnivNewlineRead(
980 &bzerror, self->fp,
981 buffer, buffersize,
982 self);
983 self->pos += chunksize;
984 Py_END_ALLOW_THREADS
985
986 bytesread += chunksize;
987 if (bzerror == BZ_STREAM_END) {
988 break;
989 } else if (bzerror != BZ_OK) {
990 Util_CatchBZ2Error(bzerror);
991 goto cleanup;
992 }
993 }
994 self->mode = MODE_READ_EOF;
995 self->size = self->pos;
996 bytesread = 0;
997 }
998 offset = self->size + offset;
999 if (offset >= self->pos)
1000 offset -= self->pos;
1001 else
1002 rewind = 1;
1003 }
1004 if (offset < 0)
1005 offset = 0;
1006 } else if (where == 0) {
1007 if (offset >= self->pos)
1008 offset -= self->pos;
1009 else
1010 rewind = 1;
1011 }
1012
1013 if (rewind) {
1014 BZ2_bzReadClose(&bzerror, self->fp);
1015 func = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1016 "seek");
1017 if (bzerror != BZ_OK) {
1018 Util_CatchBZ2Error(bzerror);
1019 goto cleanup;
1020 }
1021 if (!func) {
1022 PyErr_SetString(PyExc_RuntimeError,
1023 "can't find file.seek method");
1024 goto cleanup;
1025 }
1026 ret = PyObject_CallFunction(func, "(i)", 0);
1027 if (!ret)
1028 goto cleanup;
1029 Py_DECREF(ret);
1030 ret = NULL;
1031 self->pos = 0;
1032 self->fp = BZ2_bzReadOpen(&bzerror,
1033 PyFile_AsFile((PyObject*)self),
1034 0, 0, NULL, 0);
1035 if (bzerror != BZ_OK) {
1036 Util_CatchBZ2Error(bzerror);
1037 goto cleanup;
1038 }
1039 self->mode = MODE_READ;
1040 } else if (self->mode == MODE_READ_EOF) {
1041 goto exit;
1042 }
1043
1044 if (offset == 0)
1045 goto exit;
1046
1047 /* Before getting here, offset must be set to the number of bytes
1048 * to walk forward. */
1049 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001050 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001051 readsize = buffersize;
1052 else
1053 readsize = offset-bytesread;
1054 Py_BEGIN_ALLOW_THREADS
1055 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1056 buffer, readsize, self);
1057 self->pos += chunksize;
1058 Py_END_ALLOW_THREADS
1059 bytesread += chunksize;
1060 if (bzerror == BZ_STREAM_END) {
1061 self->size = self->pos;
1062 self->mode = MODE_READ_EOF;
1063 break;
1064 } else if (bzerror != BZ_OK) {
1065 Util_CatchBZ2Error(bzerror);
1066 goto cleanup;
1067 }
1068 if (bytesread == offset)
1069 break;
1070 }
1071
1072exit:
1073 Py_INCREF(Py_None);
1074 ret = Py_None;
1075
1076cleanup:
1077 RELEASE_LOCK(self);
1078 return ret;
1079}
1080
1081PyDoc_STRVAR(BZ2File_tell__doc__,
1082"tell() -> int\n\
1083\n\
1084Return the current file position, an integer (may be a long integer).\n\
1085");
1086
1087static PyObject *
1088BZ2File_tell(BZ2FileObject *self, PyObject *args)
1089{
1090 PyObject *ret = NULL;
1091
1092 if (self->mode == MODE_CLOSED) {
1093 PyErr_SetString(PyExc_ValueError,
1094 "I/O operation on closed file");
1095 goto cleanup;
1096 }
1097
1098 ret = PyInt_FromLong(self->pos);
1099
1100cleanup:
1101 return ret;
1102}
1103
1104PyDoc_STRVAR(BZ2File_notsup__doc__,
1105"Operation not supported.\n\
1106");
1107
1108static PyObject *
1109BZ2File_notsup(BZ2FileObject *self, PyObject *args)
1110{
1111 PyErr_SetString(PyExc_IOError, "operation not supported");
1112 return NULL;
1113}
1114
1115PyDoc_STRVAR(BZ2File_close__doc__,
1116"close() -> None or (perhaps) an integer\n\
1117\n\
1118Close the file. Sets data attribute .closed to true. A closed file\n\
1119cannot be used for further I/O operations. close() may be called more\n\
1120than once without error.\n\
1121");
1122
1123static PyObject *
1124BZ2File_close(BZ2FileObject *self)
1125{
1126 PyObject *file_close;
1127 PyObject *ret = NULL;
1128 int bzerror = BZ_OK;
1129
1130 ACQUIRE_LOCK(self);
1131 switch (self->mode) {
1132 case MODE_READ:
1133 case MODE_READ_EOF:
1134 BZ2_bzReadClose(&bzerror, self->fp);
1135 break;
1136 case MODE_WRITE:
1137 BZ2_bzWriteClose(&bzerror, self->fp,
1138 0, NULL, NULL);
1139 break;
1140 }
1141 self->mode = MODE_CLOSED;
1142 file_close = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1143 "close");
1144 if (!file_close) {
1145 PyErr_SetString(PyExc_RuntimeError,
1146 "can't find file.close method");
1147 goto cleanup;
1148 }
1149 ret = PyObject_CallObject(file_close, NULL);
1150 if (bzerror != BZ_OK) {
1151 Util_CatchBZ2Error(bzerror);
1152 Py_XDECREF(ret);
1153 ret = NULL;
1154 goto cleanup;
1155 }
1156
1157cleanup:
1158 RELEASE_LOCK(self);
1159 return ret;
1160}
1161
1162static PyMethodDef BZ2File_methods[] = {
1163 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1167 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1168 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1169 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1170 {"truncate", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1171 {"readinto", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1172 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1173 {NULL, NULL} /* sentinel */
1174};
1175
1176
1177/* ===================================================================== */
1178/* Slot definitions for BZ2File_Type. */
1179
1180static int
1181BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1182{
1183 PyObject *file_args = NULL;
1184 static char *kwlist[] = {"filename", "mode", "buffering",
1185 "compresslevel", 0};
1186 char *name = NULL;
1187 char *mode = "r";
1188 int buffering = -1;
1189 int compresslevel = 9;
1190 int bzerror;
1191 int mode_char = 0;
1192 int univ_newline = 0;
1193
1194 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001195
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "et|sii:BZ2File",
1197 kwlist, Py_FileSystemDefaultEncoding,
1198 &name, &mode, &buffering,
1199 &compresslevel))
1200 return -1;
1201
1202 if (compresslevel < 1 || compresslevel > 9) {
1203 PyErr_SetString(PyExc_ValueError,
1204 "compresslevel must be between 1 and 9");
1205 return -1;
1206 }
1207
1208 for (;;) {
1209 int error = 0;
1210 switch (*mode) {
1211 case 'r':
1212 case 'w':
1213 if (mode_char)
1214 error = 1;
1215 mode_char = *mode;
1216 break;
1217
1218 case 'b':
1219 break;
1220
1221 case 'U':
1222 univ_newline = 1;
1223 break;
1224
1225 default:
1226 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001227 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001228 }
1229 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001230 PyErr_Format(PyExc_ValueError,
1231 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001232 return -1;
1233 }
1234 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001235 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001236 break;
1237 }
1238
1239 if (mode_char == 'r')
1240 mode = univ_newline ? "rbU" : "rb";
1241 else
1242 mode = univ_newline ? "wbU" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001243
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001244 file_args = Py_BuildValue("(ssi)", name, mode, buffering);
1245 if (!file_args)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001246 return -1;
1247
1248 /* From now on, we have stuff to dealloc, so jump to error label
1249 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250
1251 if (PyFile_Type.tp_init((PyObject *)self, file_args, NULL) < 0)
1252 goto error;
1253
1254#ifdef WITH_THREAD
1255 self->lock = PyThread_allocate_lock();
1256 if (!self->lock)
1257 goto error;
1258#endif
1259
1260 if (mode_char == 'r')
1261 self->fp = BZ2_bzReadOpen(&bzerror,
1262 PyFile_AsFile((PyObject*)self),
1263 0, 0, NULL, 0);
1264 else
1265 self->fp = BZ2_bzWriteOpen(&bzerror,
1266 PyFile_AsFile((PyObject*)self),
1267 compresslevel, 0, 0);
1268
1269 if (bzerror != BZ_OK) {
1270 Util_CatchBZ2Error(bzerror);
1271 goto error;
1272 }
1273
1274 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1275
1276 Py_XDECREF(file_args);
1277 PyMem_Free(name);
1278 return 0;
1279
1280error:
1281#ifdef WITH_THREAD
1282 if (self->lock)
1283 PyThread_free_lock(self->lock);
1284#endif
1285 Py_XDECREF(file_args);
1286 PyMem_Free(name);
1287 return -1;
1288}
1289
1290static void
1291BZ2File_dealloc(BZ2FileObject *self)
1292{
1293 int bzerror;
1294#ifdef WITH_THREAD
1295 if (self->lock)
1296 PyThread_free_lock(self->lock);
1297#endif
1298 switch (self->mode) {
1299 case MODE_READ:
1300 case MODE_READ_EOF:
1301 BZ2_bzReadClose(&bzerror, self->fp);
1302 break;
1303 case MODE_WRITE:
1304 BZ2_bzWriteClose(&bzerror, self->fp,
1305 0, NULL, NULL);
1306 break;
1307 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001308 Util_DropReadAhead(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001309 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1310}
1311
1312/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1313static PyObject *
1314BZ2File_getiter(BZ2FileObject *self)
1315{
1316 if (self->mode == MODE_CLOSED) {
1317 PyErr_SetString(PyExc_ValueError,
1318 "I/O operation on closed file");
1319 return NULL;
1320 }
1321 Py_INCREF((PyObject*)self);
1322 return (PyObject *)self;
1323}
1324
1325/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1326#define READAHEAD_BUFSIZE 8192
1327static PyObject *
1328BZ2File_iternext(BZ2FileObject *self)
1329{
1330 PyStringObject* ret;
1331 ACQUIRE_LOCK(self);
1332 if (self->mode == MODE_CLOSED) {
1333 PyErr_SetString(PyExc_ValueError,
1334 "I/O operation on closed file");
1335 return NULL;
1336 }
1337 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1338 RELEASE_LOCK(self);
1339 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1340 Py_XDECREF(ret);
1341 return NULL;
1342 }
1343 return (PyObject *)ret;
1344}
1345
1346/* ===================================================================== */
1347/* BZ2File_Type definition. */
1348
1349PyDoc_VAR(BZ2File__doc__) =
1350PyDoc_STR(
1351"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1352\n\
1353Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1354writing. When opened for writing, the file will be created if it doesn't\n\
1355exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1356unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1357is given, must be a number between 1 and 9.\n\
1358")
1359#ifdef WITH_UNIVERSAL_NEWLINES
1360PyDoc_STR(
1361"\n\
1362Add a 'U' to mode to open the file for input with universal newline\n\
1363support. Any line ending in the input file will be seen as a '\\n' in\n\
1364Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1365for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1366'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1367newlines are available only when reading.\n\
1368")
1369#endif
1370;
1371
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001372static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373 PyObject_HEAD_INIT(NULL)
1374 0, /*ob_size*/
1375 "bz2.BZ2File", /*tp_name*/
1376 sizeof(BZ2FileObject), /*tp_basicsize*/
1377 0, /*tp_itemsize*/
1378 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1379 0, /*tp_print*/
1380 0, /*tp_getattr*/
1381 0, /*tp_setattr*/
1382 0, /*tp_compare*/
1383 0, /*tp_repr*/
1384 0, /*tp_as_number*/
1385 0, /*tp_as_sequence*/
1386 0, /*tp_as_mapping*/
1387 0, /*tp_hash*/
1388 0, /*tp_call*/
1389 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001390 PyObject_GenericGetAttr,/*tp_getattro*/
1391 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001392 0, /*tp_as_buffer*/
1393 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1394 BZ2File__doc__, /*tp_doc*/
1395 0, /*tp_traverse*/
1396 0, /*tp_clear*/
1397 0, /*tp_richcompare*/
1398 0, /*tp_weaklistoffset*/
1399 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1400 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1401 BZ2File_methods, /*tp_methods*/
1402 0, /*tp_members*/
1403 0, /*tp_getset*/
1404 0, /*tp_base*/
1405 0, /*tp_dict*/
1406 0, /*tp_descr_get*/
1407 0, /*tp_descr_set*/
1408 0, /*tp_dictoffset*/
1409 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001410 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001411 0, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001412 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001413 0, /*tp_is_gc*/
1414};
1415
1416
1417/* ===================================================================== */
1418/* Methods of BZ2Comp. */
1419
1420PyDoc_STRVAR(BZ2Comp_compress__doc__,
1421"compress(data) -> string\n\
1422\n\
1423Provide more data to the compressor object. It will return chunks of\n\
1424compressed data whenever possible. When you've finished providing data\n\
1425to compress, call the flush() method to finish the compression process,\n\
1426and return what is left in the internal buffers.\n\
1427");
1428
1429static PyObject *
1430BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1431{
1432 char *data;
1433 int datasize;
1434 int bufsize = SMALLCHUNK;
Tim Peters07f075c2002-11-09 04:26:02 +00001435 LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001436 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437 bz_stream *bzs = &self->bzs;
1438 int bzerror;
1439
1440 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1441 return NULL;
1442
1443 ACQUIRE_LOCK(self);
1444 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001445 PyErr_SetString(PyExc_ValueError,
1446 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001447 goto error;
1448 }
1449
1450 ret = PyString_FromStringAndSize(NULL, bufsize);
1451 if (!ret)
1452 goto error;
1453
1454 bzs->next_in = data;
1455 bzs->avail_in = datasize;
1456 bzs->next_out = BUF(ret);
1457 bzs->avail_out = bufsize;
1458
1459 totalout = BZS_TOTAL_OUT(bzs);
1460
1461 for (;;) {
1462 Py_BEGIN_ALLOW_THREADS
1463 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1464 Py_END_ALLOW_THREADS
1465 if (bzerror != BZ_RUN_OK) {
1466 Util_CatchBZ2Error(bzerror);
1467 goto error;
1468 }
1469 if (bzs->avail_out == 0) {
1470 bufsize = Util_NewBufferSize(bufsize);
1471 if (_PyString_Resize(&ret, bufsize) < 0) {
1472 BZ2_bzCompressEnd(bzs);
1473 goto error;
1474 }
1475 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1476 - totalout);
1477 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1478 } else if (bzs->avail_in == 0) {
1479 break;
1480 }
1481 }
1482
Tim Petersf29f0c62002-11-09 04:28:17 +00001483 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001484
1485 RELEASE_LOCK(self);
1486 return ret;
1487
1488error:
1489 RELEASE_LOCK(self);
1490 Py_XDECREF(ret);
1491 return NULL;
1492}
1493
1494PyDoc_STRVAR(BZ2Comp_flush__doc__,
1495"flush() -> string\n\
1496\n\
1497Finish the compression process and return what is left in internal buffers.\n\
1498You must not use the compressor object after calling this method.\n\
1499");
1500
1501static PyObject *
1502BZ2Comp_flush(BZ2CompObject *self)
1503{
1504 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001505 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001506 bz_stream *bzs = &self->bzs;
Tim Peters2858e5e2002-11-09 04:30:08 +00001507 LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001508 int bzerror;
1509
1510 ACQUIRE_LOCK(self);
1511 if (!self->running) {
1512 PyErr_SetString(PyExc_ValueError, "object was already "
1513 "flushed");
1514 goto error;
1515 }
1516 self->running = 0;
1517
1518 ret = PyString_FromStringAndSize(NULL, bufsize);
1519 if (!ret)
1520 goto error;
1521
1522 bzs->next_out = BUF(ret);
1523 bzs->avail_out = bufsize;
1524
1525 totalout = BZS_TOTAL_OUT(bzs);
1526
1527 for (;;) {
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror == BZ_STREAM_END) {
1532 break;
1533 } else if (bzerror != BZ_FINISH_OK) {
1534 Util_CatchBZ2Error(bzerror);
1535 goto error;
1536 }
1537 if (bzs->avail_out == 0) {
1538 bufsize = Util_NewBufferSize(bufsize);
1539 if (_PyString_Resize(&ret, bufsize) < 0)
1540 goto error;
1541 bzs->next_out = BUF(ret);
1542 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1543 - totalout);
1544 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1545 }
1546 }
1547
1548 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001549 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001550
1551 RELEASE_LOCK(self);
1552 return ret;
1553
1554error:
1555 RELEASE_LOCK(self);
1556 Py_XDECREF(ret);
1557 return NULL;
1558}
1559
1560static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001561 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1562 BZ2Comp_compress__doc__},
1563 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1564 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001565 {NULL, NULL} /* sentinel */
1566};
1567
1568
1569/* ===================================================================== */
1570/* Slot definitions for BZ2Comp_Type. */
1571
1572static int
1573BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1574{
1575 int compresslevel = 9;
1576 int bzerror;
1577 static char *kwlist[] = {"compresslevel", 0};
1578
1579 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1580 kwlist, &compresslevel))
1581 return -1;
1582
1583 if (compresslevel < 1 || compresslevel > 9) {
1584 PyErr_SetString(PyExc_ValueError,
1585 "compresslevel must be between 1 and 9");
1586 goto error;
1587 }
1588
1589#ifdef WITH_THREAD
1590 self->lock = PyThread_allocate_lock();
1591 if (!self->lock)
1592 goto error;
1593#endif
1594
1595 memset(&self->bzs, 0, sizeof(bz_stream));
1596 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1597 if (bzerror != BZ_OK) {
1598 Util_CatchBZ2Error(bzerror);
1599 goto error;
1600 }
1601
1602 self->running = 1;
1603
1604 return 0;
1605error:
1606#ifdef WITH_THREAD
1607 if (self->lock)
1608 PyThread_free_lock(self->lock);
1609#endif
1610 return -1;
1611}
1612
1613static void
1614BZ2Comp_dealloc(BZ2CompObject *self)
1615{
1616#ifdef WITH_THREAD
1617 if (self->lock)
1618 PyThread_free_lock(self->lock);
1619#endif
1620 BZ2_bzCompressEnd(&self->bzs);
1621 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1622}
1623
1624
1625/* ===================================================================== */
1626/* BZ2Comp_Type definition. */
1627
1628PyDoc_STRVAR(BZ2Comp__doc__,
1629"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1630\n\
1631Create a new compressor object. This object may be used to compress\n\
1632data sequentially. If you want to compress data in one shot, use the\n\
1633compress() function instead. The compresslevel parameter, if given,\n\
1634must be a number between 1 and 9.\n\
1635");
1636
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001637static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001638 PyObject_HEAD_INIT(NULL)
1639 0, /*ob_size*/
1640 "bz2.BZ2Compressor", /*tp_name*/
1641 sizeof(BZ2CompObject), /*tp_basicsize*/
1642 0, /*tp_itemsize*/
1643 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1644 0, /*tp_print*/
1645 0, /*tp_getattr*/
1646 0, /*tp_setattr*/
1647 0, /*tp_compare*/
1648 0, /*tp_repr*/
1649 0, /*tp_as_number*/
1650 0, /*tp_as_sequence*/
1651 0, /*tp_as_mapping*/
1652 0, /*tp_hash*/
1653 0, /*tp_call*/
1654 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001655 PyObject_GenericGetAttr,/*tp_getattro*/
1656 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001657 0, /*tp_as_buffer*/
1658 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1659 BZ2Comp__doc__, /*tp_doc*/
1660 0, /*tp_traverse*/
1661 0, /*tp_clear*/
1662 0, /*tp_richcompare*/
1663 0, /*tp_weaklistoffset*/
1664 0, /*tp_iter*/
1665 0, /*tp_iternext*/
1666 BZ2Comp_methods, /*tp_methods*/
1667 0, /*tp_members*/
1668 0, /*tp_getset*/
1669 0, /*tp_base*/
1670 0, /*tp_dict*/
1671 0, /*tp_descr_get*/
1672 0, /*tp_descr_set*/
1673 0, /*tp_dictoffset*/
1674 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001675 PyType_GenericAlloc, /*tp_alloc*/
1676 PyType_GenericNew, /*tp_new*/
1677 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001678 0, /*tp_is_gc*/
1679};
1680
1681
1682/* ===================================================================== */
1683/* Members of BZ2Decomp. */
1684
1685#define OFF(x) offsetof(BZ2DecompObject, x)
1686
1687static PyMemberDef BZ2Decomp_members[] = {
1688 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1689 {NULL} /* Sentinel */
1690};
1691
1692
1693/* ===================================================================== */
1694/* Methods of BZ2Decomp. */
1695
1696PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1697"decompress(data) -> string\n\
1698\n\
1699Provide more data to the decompressor object. It will return chunks\n\
1700of decompressed data whenever possible. If you try to decompress data\n\
1701after the end of stream is found, EOFError will be raised. If any data\n\
1702was found after the end of stream, it'll be ignored and saved in\n\
1703unused_data attribute.\n\
1704");
1705
1706static PyObject *
1707BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1708{
1709 char *data;
1710 int datasize;
1711 int bufsize = SMALLCHUNK;
Tim Peters39185d62002-11-09 04:31:38 +00001712 LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001713 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001714 bz_stream *bzs = &self->bzs;
1715 int bzerror;
1716
1717 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1718 return NULL;
1719
1720 ACQUIRE_LOCK(self);
1721 if (!self->running) {
1722 PyErr_SetString(PyExc_EOFError, "end of stream was "
1723 "already found");
1724 goto error;
1725 }
1726
1727 ret = PyString_FromStringAndSize(NULL, bufsize);
1728 if (!ret)
1729 goto error;
1730
1731 bzs->next_in = data;
1732 bzs->avail_in = datasize;
1733 bzs->next_out = BUF(ret);
1734 bzs->avail_out = bufsize;
1735
1736 totalout = BZS_TOTAL_OUT(bzs);
1737
1738 for (;;) {
1739 Py_BEGIN_ALLOW_THREADS
1740 bzerror = BZ2_bzDecompress(bzs);
1741 Py_END_ALLOW_THREADS
1742 if (bzerror == BZ_STREAM_END) {
1743 if (bzs->avail_in != 0) {
1744 Py_DECREF(self->unused_data);
1745 self->unused_data =
1746 PyString_FromStringAndSize(bzs->next_in,
1747 bzs->avail_in);
1748 }
1749 self->running = 0;
1750 break;
1751 }
1752 if (bzerror != BZ_OK) {
1753 Util_CatchBZ2Error(bzerror);
1754 goto error;
1755 }
1756 if (bzs->avail_out == 0) {
1757 bufsize = Util_NewBufferSize(bufsize);
1758 if (_PyString_Resize(&ret, bufsize) < 0) {
1759 BZ2_bzDecompressEnd(bzs);
1760 goto error;
1761 }
1762 bzs->next_out = BUF(ret);
1763 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1764 - totalout);
1765 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1766 } else if (bzs->avail_in == 0) {
1767 break;
1768 }
1769 }
1770
1771 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001772 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001773
1774 RELEASE_LOCK(self);
1775 return ret;
1776
1777error:
1778 RELEASE_LOCK(self);
1779 Py_XDECREF(ret);
1780 return NULL;
1781}
1782
1783static PyMethodDef BZ2Decomp_methods[] = {
1784 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1785 {NULL, NULL} /* sentinel */
1786};
1787
1788
1789/* ===================================================================== */
1790/* Slot definitions for BZ2Decomp_Type. */
1791
1792static int
1793BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1794{
1795 int bzerror;
1796
1797 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1798 return -1;
1799
1800#ifdef WITH_THREAD
1801 self->lock = PyThread_allocate_lock();
1802 if (!self->lock)
1803 goto error;
1804#endif
1805
1806 self->unused_data = PyString_FromString("");
1807 if (!self->unused_data)
1808 goto error;
1809
1810 memset(&self->bzs, 0, sizeof(bz_stream));
1811 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1812 if (bzerror != BZ_OK) {
1813 Util_CatchBZ2Error(bzerror);
1814 goto error;
1815 }
1816
1817 self->running = 1;
1818
1819 return 0;
1820
1821error:
1822#ifdef WITH_THREAD
1823 if (self->lock)
1824 PyThread_free_lock(self->lock);
1825#endif
1826 Py_XDECREF(self->unused_data);
1827 return -1;
1828}
1829
1830static void
1831BZ2Decomp_dealloc(BZ2DecompObject *self)
1832{
1833#ifdef WITH_THREAD
1834 if (self->lock)
1835 PyThread_free_lock(self->lock);
1836#endif
1837 Py_XDECREF(self->unused_data);
1838 BZ2_bzDecompressEnd(&self->bzs);
1839 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1840}
1841
1842
1843/* ===================================================================== */
1844/* BZ2Decomp_Type definition. */
1845
1846PyDoc_STRVAR(BZ2Decomp__doc__,
1847"BZ2Decompressor() -> decompressor object\n\
1848\n\
1849Create a new decompressor object. This object may be used to decompress\n\
1850data sequentially. If you want to decompress data in one shot, use the\n\
1851decompress() function instead.\n\
1852");
1853
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001854static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855 PyObject_HEAD_INIT(NULL)
1856 0, /*ob_size*/
1857 "bz2.BZ2Decompressor", /*tp_name*/
1858 sizeof(BZ2DecompObject), /*tp_basicsize*/
1859 0, /*tp_itemsize*/
1860 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1861 0, /*tp_print*/
1862 0, /*tp_getattr*/
1863 0, /*tp_setattr*/
1864 0, /*tp_compare*/
1865 0, /*tp_repr*/
1866 0, /*tp_as_number*/
1867 0, /*tp_as_sequence*/
1868 0, /*tp_as_mapping*/
1869 0, /*tp_hash*/
1870 0, /*tp_call*/
1871 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001872 PyObject_GenericGetAttr,/*tp_getattro*/
1873 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001874 0, /*tp_as_buffer*/
1875 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1876 BZ2Decomp__doc__, /*tp_doc*/
1877 0, /*tp_traverse*/
1878 0, /*tp_clear*/
1879 0, /*tp_richcompare*/
1880 0, /*tp_weaklistoffset*/
1881 0, /*tp_iter*/
1882 0, /*tp_iternext*/
1883 BZ2Decomp_methods, /*tp_methods*/
1884 BZ2Decomp_members, /*tp_members*/
1885 0, /*tp_getset*/
1886 0, /*tp_base*/
1887 0, /*tp_dict*/
1888 0, /*tp_descr_get*/
1889 0, /*tp_descr_set*/
1890 0, /*tp_dictoffset*/
1891 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001892 PyType_GenericAlloc, /*tp_alloc*/
1893 PyType_GenericNew, /*tp_new*/
1894 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001895 0, /*tp_is_gc*/
1896};
1897
1898
1899/* ===================================================================== */
1900/* Module functions. */
1901
1902PyDoc_STRVAR(bz2_compress__doc__,
1903"compress(data [, compresslevel=9]) -> string\n\
1904\n\
1905Compress data in one shot. If you want to compress data sequentially,\n\
1906use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1907given, must be a number between 1 and 9.\n\
1908");
1909
1910static PyObject *
1911bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1912{
1913 int compresslevel=9;
1914 char *data;
1915 int datasize;
1916 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001917 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001918 bz_stream _bzs;
1919 bz_stream *bzs = &_bzs;
1920 int bzerror;
1921 static char *kwlist[] = {"data", "compresslevel", 0};
1922
1923 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1924 kwlist, &data, &datasize,
1925 &compresslevel))
1926 return NULL;
1927
1928 if (compresslevel < 1 || compresslevel > 9) {
1929 PyErr_SetString(PyExc_ValueError,
1930 "compresslevel must be between 1 and 9");
1931 return NULL;
1932 }
1933
1934 /* Conforming to bz2 manual, this is large enough to fit compressed
1935 * data in one shot. We will check it later anyway. */
1936 bufsize = datasize + (datasize/100+1) + 600;
1937
1938 ret = PyString_FromStringAndSize(NULL, bufsize);
1939 if (!ret)
1940 return NULL;
1941
1942 memset(bzs, 0, sizeof(bz_stream));
1943
1944 bzs->next_in = data;
1945 bzs->avail_in = datasize;
1946 bzs->next_out = BUF(ret);
1947 bzs->avail_out = bufsize;
1948
1949 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1950 if (bzerror != BZ_OK) {
1951 Util_CatchBZ2Error(bzerror);
1952 Py_DECREF(ret);
1953 return NULL;
1954 }
Tim Peterse3228092002-11-09 04:21:44 +00001955
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001956 for (;;) {
1957 Py_BEGIN_ALLOW_THREADS
1958 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1959 Py_END_ALLOW_THREADS
1960 if (bzerror == BZ_STREAM_END) {
1961 break;
1962 } else if (bzerror != BZ_FINISH_OK) {
1963 BZ2_bzCompressEnd(bzs);
1964 Util_CatchBZ2Error(bzerror);
1965 Py_DECREF(ret);
1966 return NULL;
1967 }
1968 if (bzs->avail_out == 0) {
1969 bufsize = Util_NewBufferSize(bufsize);
1970 if (_PyString_Resize(&ret, bufsize) < 0) {
1971 BZ2_bzCompressEnd(bzs);
1972 Py_DECREF(ret);
1973 return NULL;
1974 }
1975 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1976 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1977 }
1978 }
1979
1980 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00001981 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001982 BZ2_bzCompressEnd(bzs);
1983
1984 return ret;
1985}
1986
1987PyDoc_STRVAR(bz2_decompress__doc__,
1988"decompress(data) -> decompressed data\n\
1989\n\
1990Decompress data in one shot. If you want to decompress data sequentially,\n\
1991use an instance of BZ2Decompressor instead.\n\
1992");
1993
1994static PyObject *
1995bz2_decompress(PyObject *self, PyObject *args)
1996{
1997 char *data;
1998 int datasize;
1999 int bufsize = SMALLCHUNK;
2000 PyObject *ret;
2001 bz_stream _bzs;
2002 bz_stream *bzs = &_bzs;
2003 int bzerror;
2004
2005 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2006 return NULL;
2007
2008 if (datasize == 0)
2009 return PyString_FromString("");
2010
2011 ret = PyString_FromStringAndSize(NULL, bufsize);
2012 if (!ret)
2013 return NULL;
2014
2015 memset(bzs, 0, sizeof(bz_stream));
2016
2017 bzs->next_in = data;
2018 bzs->avail_in = datasize;
2019 bzs->next_out = BUF(ret);
2020 bzs->avail_out = bufsize;
2021
2022 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2023 if (bzerror != BZ_OK) {
2024 Util_CatchBZ2Error(bzerror);
2025 Py_DECREF(ret);
2026 return NULL;
2027 }
Tim Peterse3228092002-11-09 04:21:44 +00002028
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002029 for (;;) {
2030 Py_BEGIN_ALLOW_THREADS
2031 bzerror = BZ2_bzDecompress(bzs);
2032 Py_END_ALLOW_THREADS
2033 if (bzerror == BZ_STREAM_END) {
2034 break;
2035 } else if (bzerror != BZ_OK) {
2036 BZ2_bzDecompressEnd(bzs);
2037 Util_CatchBZ2Error(bzerror);
2038 Py_DECREF(ret);
2039 return NULL;
2040 }
2041 if (bzs->avail_out == 0) {
2042 bufsize = Util_NewBufferSize(bufsize);
2043 if (_PyString_Resize(&ret, bufsize) < 0) {
2044 BZ2_bzDecompressEnd(bzs);
2045 Py_DECREF(ret);
2046 return NULL;
2047 }
2048 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2049 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2050 } else if (bzs->avail_in == 0) {
2051 BZ2_bzDecompressEnd(bzs);
2052 PyErr_SetString(PyExc_ValueError,
2053 "couldn't find end of stream");
2054 Py_DECREF(ret);
2055 return NULL;
2056 }
2057 }
2058
2059 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002060 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002061 BZ2_bzDecompressEnd(bzs);
2062
2063 return ret;
2064}
2065
2066static PyMethodDef bz2_methods[] = {
2067 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2068 bz2_compress__doc__},
2069 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2070 bz2_decompress__doc__},
2071 {NULL, NULL} /* sentinel */
2072};
2073
2074/* ===================================================================== */
2075/* Initialization function. */
2076
2077PyDoc_STRVAR(bz2__doc__,
2078"The python bz2 module provides a comprehensive interface for\n\
2079the bz2 compression library. It implements a complete file\n\
2080interface, one shot (de)compression functions, and types for\n\
2081sequential (de)compression.\n\
2082");
2083
2084DL_EXPORT(void)
2085initbz2(void)
2086{
2087 PyObject *m;
2088
2089 BZ2File_Type.ob_type = &PyType_Type;
2090 BZ2File_Type.tp_base = &PyFile_Type;
2091 BZ2File_Type.tp_new = PyFile_Type.tp_new;
2092
2093 BZ2Comp_Type.ob_type = &PyType_Type;
2094 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002095
2096 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2097
2098 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2099
2100 Py_INCREF(&BZ2File_Type);
2101 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2102
2103 Py_INCREF(&BZ2Comp_Type);
2104 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2105
2106 Py_INCREF(&BZ2Decomp_Type);
2107 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2108}