blob: c664c2a8442d345a3c7567fa6dc918ced600040f [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
10#include <stdio.h>
11#include <bzlib.h>
12#include "Python.h"
13#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
34#if SIZEOF_LONG >= 8
35#define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37#elif SIZEOF_LONG_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39 (((long long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#else
41#define BZS_TOTAL_OUT(bzs) \
42 bzs->total_out_lo32;
43#endif
44
45#ifdef WITH_THREAD
46#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48#else
49#define ACQUIRE_LOCK(obj)
50#define RELEASE_LOCK(obj)
51#endif
52
53#ifdef WITH_UNIVERSAL_NEWLINES
54/* Bits in f_newlinetypes */
55#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56#define NEWLINE_CR 1 /* \r newline seen */
57#define NEWLINE_LF 2 /* \n newline seen */
58#define NEWLINE_CRLF 4 /* \r\n newline seen */
59#endif
60
61/* ===================================================================== */
62/* Structure definitions. */
63
64typedef struct {
65 PyFileObject file;
66 BZFILE *fp;
67 int mode;
68 long pos;
69 long size;
70#ifdef WITH_THREAD
71 PyThread_type_lock lock;
72#endif
73} BZ2FileObject;
74
75typedef struct {
76 PyObject_HEAD
77 bz_stream bzs;
78 int running;
79#ifdef WITH_THREAD
80 PyThread_type_lock lock;
81#endif
82} BZ2CompObject;
83
84typedef struct {
85 PyObject_HEAD
86 bz_stream bzs;
87 int running;
88 PyObject *unused_data;
89#ifdef WITH_THREAD
90 PyThread_type_lock lock;
91#endif
92} BZ2DecompObject;
93
94/* ===================================================================== */
95/* Utility functions. */
96
97static int
98Util_CatchBZ2Error(int bzerror)
99{
100 int ret = 0;
101 switch(bzerror) {
102 case BZ_OK:
103 case BZ_STREAM_END:
104 break;
105
106 case BZ_CONFIG_ERROR:
107 PyErr_SetString(PyExc_SystemError,
108 "the bz2 library was not compiled "
109 "correctly");
110 ret = 1;
111 break;
112
113 case BZ_PARAM_ERROR:
114 PyErr_SetString(PyExc_ValueError,
115 "the bz2 library has received wrong "
116 "parameters");
117 ret = 1;
118 break;
119
120 case BZ_MEM_ERROR:
121 PyErr_NoMemory();
122 ret = 1;
123 break;
124
125 case BZ_DATA_ERROR:
126 case BZ_DATA_ERROR_MAGIC:
127 PyErr_SetString(PyExc_IOError, "invalid data stream");
128 ret = 1;
129 break;
130
131 case BZ_IO_ERROR:
132 PyErr_SetString(PyExc_IOError, "unknown IO error");
133 ret = 1;
134 break;
135
136 case BZ_UNEXPECTED_EOF:
137 PyErr_SetString(PyExc_EOFError,
138 "compressed file ended before the "
139 "logical end-of-stream was detected");
140 ret = 1;
141 break;
142
143 case BZ_SEQUENCE_ERROR:
144 PyErr_SetString(PyExc_RuntimeError,
145 "wrong sequence of bz2 library "
146 "commands used");
147 ret = 1;
148 break;
149 }
150 return ret;
151}
152
153#if BUFSIZ < 8192
154#define SMALLCHUNK 8192
155#else
156#define SMALLCHUNK BUFSIZ
157#endif
158
159#if SIZEOF_INT < 4
160#define BIGCHUNK (512 * 32)
161#else
162#define BIGCHUNK (512 * 1024)
163#endif
164
165/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
166static size_t
167Util_NewBufferSize(size_t currentsize)
168{
169 if (currentsize > SMALLCHUNK) {
170 /* Keep doubling until we reach BIGCHUNK;
171 then keep adding BIGCHUNK. */
172 if (currentsize <= BIGCHUNK)
173 return currentsize + currentsize;
174 else
175 return currentsize + BIGCHUNK;
176 }
177 return currentsize + SMALLCHUNK;
178}
179
180/* This is a hacked version of Python's fileobject.c:get_line(). */
181static PyObject *
182Util_GetLine(BZ2FileObject *self, int n)
183{
184 char c;
185 char *buf, *end;
186 size_t total_v_size; /* total # of slots in buffer */
187 size_t used_v_size; /* # used slots in buffer */
188 size_t increment; /* amount to increment the buffer */
189 PyObject *v;
190 int bzerror;
191#ifdef WITH_UNIVERSAL_NEWLINES
192 int newlinetypes = ((PyFileObject*)self)->f_newlinetypes;
193 int skipnextlf = ((PyFileObject*)self)->f_skipnextlf;
194 int univ_newline = ((PyFileObject*)self)->f_univ_newline;
195#endif
196
197 total_v_size = n > 0 ? n : 100;
198 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
199 if (v == NULL)
200 return NULL;
201
202 buf = BUF(v);
203 end = buf + total_v_size;
204
205 for (;;) {
206 Py_BEGIN_ALLOW_THREADS
207#ifdef WITH_UNIVERSAL_NEWLINES
208 if (univ_newline) {
209 while (1) {
210 BZ2_bzRead(&bzerror, self->fp, &c, 1);
211 self->pos++;
212 if (bzerror != BZ_OK || buf == end)
213 break;
214 if (skipnextlf ) {
215 skipnextlf = 0;
216 if (c == '\n') {
217 /* Seeing a \n here with
218 * skipnextlf true means we
219 * saw a \r before.
220 */
221 newlinetypes |= NEWLINE_CRLF;
222 BZ2_bzRead(&bzerror, self->fp,
223 &c, 1);
224 if (bzerror != BZ_OK)
225 break;
226 } else {
227 newlinetypes |= NEWLINE_CR;
228 }
229 }
230 if (c == '\r') {
231 skipnextlf = 1;
232 c = '\n';
233 } else if ( c == '\n')
234 newlinetypes |= NEWLINE_LF;
235 *buf++ = c;
236 if (c == '\n') break;
237 }
238 if (bzerror == BZ_STREAM_END && skipnextlf)
239 newlinetypes |= NEWLINE_CR;
240 } else /* If not universal newlines use the normal loop */
241#endif
242 do {
243 BZ2_bzRead(&bzerror, self->fp, &c, 1);
244 self->pos++;
245 *buf++ = c;
246 } while (bzerror == BZ_OK && c != '\n' && buf != end);
247 Py_END_ALLOW_THREADS
248#ifdef WITH_UNIVERSAL_NEWLINES
249 ((PyFileObject*)self)->f_newlinetypes = newlinetypes;
250 ((PyFileObject*)self)->f_skipnextlf = skipnextlf;
251#endif
252 if (bzerror == BZ_STREAM_END) {
253 self->size = self->pos;
254 self->mode = MODE_READ_EOF;
255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
260 }
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
274 }
275 if (_PyString_Resize(&v, total_v_size) < 0)
276 return NULL;
277 buf = BUF(v) + used_v_size;
278 end = BUF(v) + total_v_size;
279 }
280
281 used_v_size = buf - BUF(v);
282 if (used_v_size != total_v_size)
283 _PyString_Resize(&v, used_v_size);
284 return v;
285}
286
287#ifndef WITH_UNIVERSAL_NEWLINES
288#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
289#else
290/* This is a hacked version of Python's
291 * fileobject.c:Py_UniversalNewlineFread(). */
292size_t
293Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
294 char* buf, size_t n, BZ2FileObject *fobj)
295{
296 char *dst = buf;
297 PyFileObject *f = (PyFileObject *)fobj;
298 int newlinetypes, skipnextlf;
299
300 assert(buf != NULL);
301 assert(stream != NULL);
302
303 if (!f->f_univ_newline)
304 return BZ2_bzRead(bzerror, stream, buf, n);
305
306 newlinetypes = f->f_newlinetypes;
307 skipnextlf = f->f_skipnextlf;
308
309 /* Invariant: n is the number of bytes remaining to be filled
310 * in the buffer.
311 */
312 while (n) {
313 size_t nread;
314 int shortread;
315 char *src = dst;
316
317 nread = BZ2_bzRead(bzerror, stream, dst, n);
318 assert(nread <= n);
319 n -= nread; /* assuming 1 byte out for each in; will adjust */
320 shortread = n != 0; /* true iff EOF or error */
321 while (nread--) {
322 char c = *src++;
323 if (c == '\r') {
324 /* Save as LF and set flag to skip next LF. */
325 *dst++ = '\n';
326 skipnextlf = 1;
327 }
328 else if (skipnextlf && c == '\n') {
329 /* Skip LF, and remember we saw CR LF. */
330 skipnextlf = 0;
331 newlinetypes |= NEWLINE_CRLF;
332 ++n;
333 }
334 else {
335 /* Normal char to be stored in buffer. Also
336 * update the newlinetypes flag if either this
337 * is an LF or the previous char was a CR.
338 */
339 if (c == '\n')
340 newlinetypes |= NEWLINE_LF;
341 else if (skipnextlf)
342 newlinetypes |= NEWLINE_CR;
343 *dst++ = c;
344 skipnextlf = 0;
345 }
346 }
347 if (shortread) {
348 /* If this is EOF, update type flags. */
349 if (skipnextlf && *bzerror == BZ_STREAM_END)
350 newlinetypes |= NEWLINE_CR;
351 break;
352 }
353 }
354 f->f_newlinetypes = newlinetypes;
355 f->f_skipnextlf = skipnextlf;
356 return dst - buf;
357}
358#endif
359
360/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
361static void
362Util_DropReadAhead(BZ2FileObject *self)
363{
364 PyFileObject *f = (PyFileObject*)self;
365 if (f->f_buf != NULL) {
366 PyMem_Free(f->f_buf);
367 f->f_buf = NULL;
368 }
369}
370
371/* This is a hacked version of Python's fileobject.c:readahead(). */
372static int
373Util_ReadAhead(BZ2FileObject *self, int bufsize)
374{
375 int chunksize;
376 int bzerror;
377 PyFileObject *f = (PyFileObject*)self;
378
379 if (f->f_buf != NULL) {
380 if((f->f_bufend - f->f_bufptr) >= 1)
381 return 0;
382 else
383 Util_DropReadAhead(self);
384 }
385 if (self->mode == MODE_READ_EOF) {
386 return -1;
387 }
388 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
389 return -1;
390 }
391 Py_BEGIN_ALLOW_THREADS
392 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, f->f_buf,
393 bufsize, self);
394 Py_END_ALLOW_THREADS
395 self->pos += chunksize;
396 if (bzerror == BZ_STREAM_END) {
397 self->size = self->pos;
398 self->mode = MODE_READ_EOF;
399 } else if (bzerror != BZ_OK) {
400 Util_CatchBZ2Error(bzerror);
401 Util_DropReadAhead(self);
402 return -1;
403 }
404 f->f_bufptr = f->f_buf;
405 f->f_bufend = f->f_buf + chunksize;
406 return 0;
407}
408
409/* This is a hacked version of Python's
410 * fileobject.c:readahead_get_line_skip(). */
411static PyStringObject *
412Util_ReadAheadGetLineSkip(BZ2FileObject *bf, int skip, int bufsize)
413{
414 PyFileObject *f = (PyFileObject*)bf;
415 PyStringObject* s;
416 char *bufptr;
417 char *buf;
418 int len;
419
420 if (f->f_buf == NULL)
421 if (Util_ReadAhead(bf, bufsize) < 0)
422 return NULL;
423
424 len = f->f_bufend - f->f_bufptr;
425 if (len == 0)
426 return (PyStringObject *)
427 PyString_FromStringAndSize(NULL, skip);
428 bufptr = memchr(f->f_bufptr, '\n', len);
429 if (bufptr != NULL) {
430 bufptr++; /* Count the '\n' */
431 len = bufptr - f->f_bufptr;
432 s = (PyStringObject *)
433 PyString_FromStringAndSize(NULL, skip+len);
434 if (s == NULL)
435 return NULL;
436 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
437 f->f_bufptr = bufptr;
438 if (bufptr == f->f_bufend)
439 Util_DropReadAhead(bf);
440 } else {
441 bufptr = f->f_bufptr;
442 buf = f->f_buf;
443 f->f_buf = NULL; /* Force new readahead buffer */
444 s = Util_ReadAheadGetLineSkip(
445 bf, skip+len, bufsize + (bufsize>>2) );
446 if (s == NULL) {
447 PyMem_Free(buf);
448 return NULL;
449 }
450 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
451 PyMem_Free(buf);
452 }
453 return s;
454}
455
456/* ===================================================================== */
457/* Methods of BZ2File. */
458
459PyDoc_STRVAR(BZ2File_read__doc__,
460"read([size]) -> string\n\
461\n\
462Read at most size uncompressed bytes, returned as a string. If the size\n\
463argument is negative or omitted, read until EOF is reached.\n\
464");
465
466/* This is a hacked version of Python's fileobject.c:file_read(). */
467static PyObject *
468BZ2File_read(BZ2FileObject *self, PyObject *args)
469{
470 long bytesrequested = -1;
471 size_t bytesread, buffersize, chunksize;
472 int bzerror;
473 PyObject *ret = NULL;
474
475 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
476 return NULL;
477
478 ACQUIRE_LOCK(self);
479 switch (self->mode) {
480 case MODE_READ:
481 break;
482 case MODE_READ_EOF:
483 ret = PyString_FromString("");
484 goto cleanup;
485 case MODE_CLOSED:
486 PyErr_SetString(PyExc_ValueError,
487 "I/O operation on closed file");
488 goto cleanup;
489 default:
490 PyErr_SetString(PyExc_IOError,
491 "file is not ready for reading");
492 goto cleanup;
493 }
494
495 if (bytesrequested < 0)
496 buffersize = Util_NewBufferSize((size_t)0);
497 else
498 buffersize = bytesrequested;
499 if (buffersize > INT_MAX) {
500 PyErr_SetString(PyExc_OverflowError,
501 "requested number of bytes is more than a Python string can hold");
502 goto cleanup;
503 }
504 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
505 if (ret == NULL)
506 goto cleanup;
507 bytesread = 0;
508
509 for (;;) {
510 Py_BEGIN_ALLOW_THREADS
511 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
512 BUF(ret)+bytesread,
513 buffersize-bytesread,
514 self);
515 self->pos += chunksize;
516 Py_END_ALLOW_THREADS
517 bytesread += chunksize;
518 if (bzerror == BZ_STREAM_END) {
519 self->size = self->pos;
520 self->mode = MODE_READ_EOF;
521 break;
522 } else if (bzerror != BZ_OK) {
523 Util_CatchBZ2Error(bzerror);
524 Py_DECREF(ret);
525 ret = NULL;
526 goto cleanup;
527 }
528 if (bytesrequested < 0) {
529 buffersize = Util_NewBufferSize(buffersize);
530 if (_PyString_Resize(&ret, buffersize) < 0)
531 goto cleanup;
532 } else {
533 break;
534 }
535 }
536 if (bytesread != buffersize)
537 _PyString_Resize(&ret, bytesread);
538
539cleanup:
540 RELEASE_LOCK(self);
541 return ret;
542}
543
544PyDoc_STRVAR(BZ2File_readline__doc__,
545"readline([size]) -> string\n\
546\n\
547Return the next line from the file, as a string, retaining newline.\n\
548A non-negative size argument will limit the maximum number of bytes to\n\
549return (an incomplete line may be returned then). Return an empty\n\
550string at EOF.\n\
551");
552
553static PyObject *
554BZ2File_readline(BZ2FileObject *self, PyObject *args)
555{
556 PyObject *ret = NULL;
557 int sizehint = -1;
558
559 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
560 return NULL;
561
562 ACQUIRE_LOCK(self);
563 switch (self->mode) {
564 case MODE_READ:
565 break;
566 case MODE_READ_EOF:
567 ret = PyString_FromString("");
568 goto cleanup;
569 case MODE_CLOSED:
570 PyErr_SetString(PyExc_ValueError,
571 "I/O operation on closed file");
572 goto cleanup;
573 default:
574 PyErr_SetString(PyExc_IOError,
575 "file is not ready for reading");
576 goto cleanup;
577 }
578
579 if (sizehint == 0)
580 ret = PyString_FromString("");
581 else
582 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
583
584cleanup:
585 RELEASE_LOCK(self);
586 return ret;
587}
588
589PyDoc_STRVAR(BZ2File_readlines__doc__,
590"readlines([size]) -> list\n\
591\n\
592Call readline() repeatedly and return a list of lines read.\n\
593The optional size argument, if given, is an approximate bound on the\n\
594total number of bytes in the lines returned.\n\
595");
596
597/* This is a hacked version of Python's fileobject.c:file_readlines(). */
598static PyObject *
599BZ2File_readlines(BZ2FileObject *self, PyObject *args)
600{
601 long sizehint = 0;
602 PyObject *list = NULL;
603 PyObject *line;
604 char small_buffer[SMALLCHUNK];
605 char *buffer = small_buffer;
606 size_t buffersize = SMALLCHUNK;
607 PyObject *big_buffer = NULL;
608 size_t nfilled = 0;
609 size_t nread;
610 size_t totalread = 0;
611 char *p, *q, *end;
612 int err;
613 int shortread = 0;
614 int bzerror;
615
616 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
617 return NULL;
618
619 ACQUIRE_LOCK(self);
620 switch (self->mode) {
621 case MODE_READ:
622 break;
623 case MODE_READ_EOF:
624 list = PyList_New(0);
625 goto cleanup;
626 case MODE_CLOSED:
627 PyErr_SetString(PyExc_ValueError,
628 "I/O operation on closed file");
629 goto cleanup;
630 default:
631 PyErr_SetString(PyExc_IOError,
632 "file is not ready for reading");
633 goto cleanup;
634 }
635
636 if ((list = PyList_New(0)) == NULL)
637 goto cleanup;
638
639 for (;;) {
640 Py_BEGIN_ALLOW_THREADS
641 nread = Util_UnivNewlineRead(&bzerror, self->fp,
642 buffer+nfilled,
643 buffersize-nfilled, self);
644 self->pos += nread;
645 Py_END_ALLOW_THREADS
646 if (bzerror == BZ_STREAM_END) {
647 self->size = self->pos;
648 self->mode = MODE_READ_EOF;
649 if (nread == 0) {
650 sizehint = 0;
651 break;
652 }
653 shortread = 1;
654 } else if (bzerror != BZ_OK) {
655 Util_CatchBZ2Error(bzerror);
656 error:
657 Py_DECREF(list);
658 list = NULL;
659 goto cleanup;
660 }
661 totalread += nread;
662 p = memchr(buffer+nfilled, '\n', nread);
663 if (p == NULL) {
664 /* Need a larger buffer to fit this line */
665 nfilled += nread;
666 buffersize *= 2;
667 if (buffersize > INT_MAX) {
668 PyErr_SetString(PyExc_OverflowError,
669 "line is longer than a Python string can hold");
670 goto error;
671 }
672 if (big_buffer == NULL) {
673 /* Create the big buffer */
674 big_buffer = PyString_FromStringAndSize(
675 NULL, buffersize);
676 if (big_buffer == NULL)
677 goto error;
678 buffer = PyString_AS_STRING(big_buffer);
679 memcpy(buffer, small_buffer, nfilled);
680 }
681 else {
682 /* Grow the big buffer */
683 _PyString_Resize(&big_buffer, buffersize);
684 buffer = PyString_AS_STRING(big_buffer);
685 }
686 continue;
687 }
688 end = buffer+nfilled+nread;
689 q = buffer;
690 do {
691 /* Process complete lines */
692 p++;
693 line = PyString_FromStringAndSize(q, p-q);
694 if (line == NULL)
695 goto error;
696 err = PyList_Append(list, line);
697 Py_DECREF(line);
698 if (err != 0)
699 goto error;
700 q = p;
701 p = memchr(q, '\n', end-q);
702 } while (p != NULL);
703 /* Move the remaining incomplete line to the start */
704 nfilled = end-q;
705 memmove(buffer, q, nfilled);
706 if (sizehint > 0)
707 if (totalread >= (size_t)sizehint)
708 break;
709 if (shortread) {
710 sizehint = 0;
711 break;
712 }
713 }
714 if (nfilled != 0) {
715 /* Partial last line */
716 line = PyString_FromStringAndSize(buffer, nfilled);
717 if (line == NULL)
718 goto error;
719 if (sizehint > 0) {
720 /* Need to complete the last line */
721 PyObject *rest = Util_GetLine(self, 0);
722 if (rest == NULL) {
723 Py_DECREF(line);
724 goto error;
725 }
726 PyString_Concat(&line, rest);
727 Py_DECREF(rest);
728 if (line == NULL)
729 goto error;
730 }
731 err = PyList_Append(list, line);
732 Py_DECREF(line);
733 if (err != 0)
734 goto error;
735 }
736
737 cleanup:
738 RELEASE_LOCK(self);
739 if (big_buffer) {
740 Py_DECREF(big_buffer);
741 }
742 return list;
743}
744
745PyDoc_STRVAR(BZ2File_write__doc__,
746"write(data) -> None\n\
747\n\
748Write the 'data' string to file. Note that due to buffering, close() may\n\
749be needed before the file on disk reflects the data written.\n\
750");
751
752/* This is a hacked version of Python's fileobject.c:file_write(). */
753static PyObject *
754BZ2File_write(BZ2FileObject *self, PyObject *args)
755{
756 PyObject *ret = NULL;
757 char *buf;
758 int len;
759 int bzerror;
760
761 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
762 return NULL;
763
764 ACQUIRE_LOCK(self);
765 switch (self->mode) {
766 case MODE_WRITE:
767 break;
768
769 case MODE_CLOSED:
770 PyErr_SetString(PyExc_ValueError,
771 "I/O operation on closed file");
772 goto cleanup;;
773
774 default:
775 PyErr_SetString(PyExc_IOError,
776 "file is not ready for writing");
777 goto cleanup;;
778 }
779
780 PyFile_SoftSpace((PyObject*)self, 0);
781
782 Py_BEGIN_ALLOW_THREADS
783 BZ2_bzWrite (&bzerror, self->fp, buf, len);
784 self->pos += len;
785 Py_END_ALLOW_THREADS
786
787 if (bzerror != BZ_OK) {
788 Util_CatchBZ2Error(bzerror);
789 goto cleanup;
790 }
791
792 Py_INCREF(Py_None);
793 ret = Py_None;
794
795cleanup:
796 RELEASE_LOCK(self);
797 return ret;
798}
799
800PyDoc_STRVAR(BZ2File_writelines__doc__,
801"writelines(sequence_of_strings) -> None\n\
802\n\
803Write the sequence of strings to the file. Note that newlines are not\n\
804added. The sequence can be any iterable object producing strings. This is\n\
805equivalent to calling write() for each string.\n\
806");
807
808/* This is a hacked version of Python's fileobject.c:file_writelines(). */
809static PyObject *
810BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
811{
812#define CHUNKSIZE 1000
813 PyObject *list = NULL;
814 PyObject *iter = NULL;
815 PyObject *ret = NULL;
816 PyObject *line;
817 int i, j, index, len, islist;
818 int bzerror;
819
820 ACQUIRE_LOCK(self);
821 islist = PyList_Check(seq);
822 if (!islist) {
823 iter = PyObject_GetIter(seq);
824 if (iter == NULL) {
825 PyErr_SetString(PyExc_TypeError,
826 "writelines() requires an iterable argument");
827 goto error;
828 }
829 list = PyList_New(CHUNKSIZE);
830 if (list == NULL)
831 goto error;
832 }
833
834 /* Strategy: slurp CHUNKSIZE lines into a private list,
835 checking that they are all strings, then write that list
836 without holding the interpreter lock, then come back for more. */
837 for (index = 0; ; index += CHUNKSIZE) {
838 if (islist) {
839 Py_XDECREF(list);
840 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
841 if (list == NULL)
842 goto error;
843 j = PyList_GET_SIZE(list);
844 }
845 else {
846 for (j = 0; j < CHUNKSIZE; j++) {
847 line = PyIter_Next(iter);
848 if (line == NULL) {
849 if (PyErr_Occurred())
850 goto error;
851 break;
852 }
853 PyList_SetItem(list, j, line);
854 }
855 }
856 if (j == 0)
857 break;
858
859 /* Check that all entries are indeed strings. If not,
860 apply the same rules as for file.write() and
861 convert the rets to strings. This is slow, but
862 seems to be the only way since all conversion APIs
863 could potentially execute Python code. */
864 for (i = 0; i < j; i++) {
865 PyObject *v = PyList_GET_ITEM(list, i);
866 if (!PyString_Check(v)) {
867 const char *buffer;
868 int len;
869 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
870 PyErr_SetString(PyExc_TypeError,
871 "writelines() "
872 "argument must be "
873 "a sequence of "
874 "strings");
875 goto error;
876 }
877 line = PyString_FromStringAndSize(buffer,
878 len);
879 if (line == NULL)
880 goto error;
881 Py_DECREF(v);
882 PyList_SET_ITEM(list, i, line);
883 }
884 }
885
886 PyFile_SoftSpace((PyObject*)self, 0);
887
888 /* Since we are releasing the global lock, the
889 following code may *not* execute Python code. */
890 Py_BEGIN_ALLOW_THREADS
891 for (i = 0; i < j; i++) {
892 line = PyList_GET_ITEM(list, i);
893 len = PyString_GET_SIZE(line);
894 BZ2_bzWrite (&bzerror, self->fp,
895 PyString_AS_STRING(line), len);
896 if (bzerror != BZ_OK) {
897 Py_BLOCK_THREADS
898 Util_CatchBZ2Error(bzerror);
899 goto error;
900 }
901 }
902 Py_END_ALLOW_THREADS
903
904 if (j < CHUNKSIZE)
905 break;
906 }
907
908 Py_INCREF(Py_None);
909 ret = Py_None;
910
911 error:
912 RELEASE_LOCK(self);
913 Py_XDECREF(list);
914 Py_XDECREF(iter);
915 return ret;
916#undef CHUNKSIZE
917}
918
919PyDoc_STRVAR(BZ2File_seek__doc__,
920"seek(offset [, whence]) -> None\n\
921\n\
922Move to new file position. Argument offset is a byte count. Optional\n\
923argument whence defaults to 0 (offset from start of file, offset\n\
924should be >= 0); other values are 1 (move relative to current position,\n\
925positive or negative), and 2 (move relative to end of file, usually\n\
926negative, although many platforms allow seeking beyond the end of a file).\n\
927\n\
928Note that seeking of bz2 files is emulated, and depending on the parameters\n\
929the operation may be extremely slow.\n\
930");
931
932static PyObject *
933BZ2File_seek(BZ2FileObject *self, PyObject *args)
934{
935 int where = 0;
936 long offset;
937 char small_buffer[SMALLCHUNK];
938 char *buffer = small_buffer;
939 size_t buffersize = SMALLCHUNK;
940 int bytesread = 0;
941 int readsize;
942 int chunksize;
943 int bzerror;
944 int rewind = 0;
945 PyObject *func;
946 PyObject *ret = NULL;
947
948 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
949 return NULL;
950
951 ACQUIRE_LOCK(self);
952 Util_DropReadAhead(self);
953 switch (self->mode) {
954 case MODE_READ:
955 case MODE_READ_EOF:
956 break;
957
958 case MODE_CLOSED:
959 PyErr_SetString(PyExc_ValueError,
960 "I/O operation on closed file");
961 goto cleanup;;
962
963 default:
964 PyErr_SetString(PyExc_IOError,
965 "seek works only while reading");
966 goto cleanup;;
967 }
968
969 if (offset < 0) {
970 if (where == 1) {
971 offset = self->pos + offset;
972 rewind = 1;
973 } else if (where == 2) {
974 if (self->size == -1) {
975 assert(self->mode != MODE_READ_EOF);
976 for (;;) {
977 Py_BEGIN_ALLOW_THREADS
978 chunksize = Util_UnivNewlineRead(
979 &bzerror, self->fp,
980 buffer, buffersize,
981 self);
982 self->pos += chunksize;
983 Py_END_ALLOW_THREADS
984
985 bytesread += chunksize;
986 if (bzerror == BZ_STREAM_END) {
987 break;
988 } else if (bzerror != BZ_OK) {
989 Util_CatchBZ2Error(bzerror);
990 goto cleanup;
991 }
992 }
993 self->mode = MODE_READ_EOF;
994 self->size = self->pos;
995 bytesread = 0;
996 }
997 offset = self->size + offset;
998 if (offset >= self->pos)
999 offset -= self->pos;
1000 else
1001 rewind = 1;
1002 }
1003 if (offset < 0)
1004 offset = 0;
1005 } else if (where == 0) {
1006 if (offset >= self->pos)
1007 offset -= self->pos;
1008 else
1009 rewind = 1;
1010 }
1011
1012 if (rewind) {
1013 BZ2_bzReadClose(&bzerror, self->fp);
1014 func = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1015 "seek");
1016 if (bzerror != BZ_OK) {
1017 Util_CatchBZ2Error(bzerror);
1018 goto cleanup;
1019 }
1020 if (!func) {
1021 PyErr_SetString(PyExc_RuntimeError,
1022 "can't find file.seek method");
1023 goto cleanup;
1024 }
1025 ret = PyObject_CallFunction(func, "(i)", 0);
1026 if (!ret)
1027 goto cleanup;
1028 Py_DECREF(ret);
1029 ret = NULL;
1030 self->pos = 0;
1031 self->fp = BZ2_bzReadOpen(&bzerror,
1032 PyFile_AsFile((PyObject*)self),
1033 0, 0, NULL, 0);
1034 if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
1037 }
1038 self->mode = MODE_READ;
1039 } else if (self->mode == MODE_READ_EOF) {
1040 goto exit;
1041 }
1042
1043 if (offset == 0)
1044 goto exit;
1045
1046 /* Before getting here, offset must be set to the number of bytes
1047 * to walk forward. */
1048 for (;;) {
1049 if (offset-bytesread > buffersize)
1050 readsize = buffersize;
1051 else
1052 readsize = offset-bytesread;
1053 Py_BEGIN_ALLOW_THREADS
1054 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1055 buffer, readsize, self);
1056 self->pos += chunksize;
1057 Py_END_ALLOW_THREADS
1058 bytesread += chunksize;
1059 if (bzerror == BZ_STREAM_END) {
1060 self->size = self->pos;
1061 self->mode = MODE_READ_EOF;
1062 break;
1063 } else if (bzerror != BZ_OK) {
1064 Util_CatchBZ2Error(bzerror);
1065 goto cleanup;
1066 }
1067 if (bytesread == offset)
1068 break;
1069 }
1070
1071exit:
1072 Py_INCREF(Py_None);
1073 ret = Py_None;
1074
1075cleanup:
1076 RELEASE_LOCK(self);
1077 return ret;
1078}
1079
1080PyDoc_STRVAR(BZ2File_tell__doc__,
1081"tell() -> int\n\
1082\n\
1083Return the current file position, an integer (may be a long integer).\n\
1084");
1085
1086static PyObject *
1087BZ2File_tell(BZ2FileObject *self, PyObject *args)
1088{
1089 PyObject *ret = NULL;
1090
1091 if (self->mode == MODE_CLOSED) {
1092 PyErr_SetString(PyExc_ValueError,
1093 "I/O operation on closed file");
1094 goto cleanup;
1095 }
1096
1097 ret = PyInt_FromLong(self->pos);
1098
1099cleanup:
1100 return ret;
1101}
1102
1103PyDoc_STRVAR(BZ2File_notsup__doc__,
1104"Operation not supported.\n\
1105");
1106
1107static PyObject *
1108BZ2File_notsup(BZ2FileObject *self, PyObject *args)
1109{
1110 PyErr_SetString(PyExc_IOError, "operation not supported");
1111 return NULL;
1112}
1113
1114PyDoc_STRVAR(BZ2File_close__doc__,
1115"close() -> None or (perhaps) an integer\n\
1116\n\
1117Close the file. Sets data attribute .closed to true. A closed file\n\
1118cannot be used for further I/O operations. close() may be called more\n\
1119than once without error.\n\
1120");
1121
1122static PyObject *
1123BZ2File_close(BZ2FileObject *self)
1124{
1125 PyObject *file_close;
1126 PyObject *ret = NULL;
1127 int bzerror = BZ_OK;
1128
1129 ACQUIRE_LOCK(self);
1130 switch (self->mode) {
1131 case MODE_READ:
1132 case MODE_READ_EOF:
1133 BZ2_bzReadClose(&bzerror, self->fp);
1134 break;
1135 case MODE_WRITE:
1136 BZ2_bzWriteClose(&bzerror, self->fp,
1137 0, NULL, NULL);
1138 break;
1139 }
1140 self->mode = MODE_CLOSED;
1141 file_close = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1142 "close");
1143 if (!file_close) {
1144 PyErr_SetString(PyExc_RuntimeError,
1145 "can't find file.close method");
1146 goto cleanup;
1147 }
1148 ret = PyObject_CallObject(file_close, NULL);
1149 if (bzerror != BZ_OK) {
1150 Util_CatchBZ2Error(bzerror);
1151 Py_XDECREF(ret);
1152 ret = NULL;
1153 goto cleanup;
1154 }
1155
1156cleanup:
1157 RELEASE_LOCK(self);
1158 return ret;
1159}
1160
1161static PyMethodDef BZ2File_methods[] = {
1162 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1163 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1164 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1165 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1166 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1167 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1168 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1169 {"truncate", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1170 {"readinto", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1171 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1172 {NULL, NULL} /* sentinel */
1173};
1174
1175
1176/* ===================================================================== */
1177/* Slot definitions for BZ2File_Type. */
1178
1179static int
1180BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1181{
1182 PyObject *file_args = NULL;
1183 static char *kwlist[] = {"filename", "mode", "buffering",
1184 "compresslevel", 0};
1185 char *name = NULL;
1186 char *mode = "r";
1187 int buffering = -1;
1188 int compresslevel = 9;
1189 int bzerror;
1190 int mode_char = 0;
1191 int univ_newline = 0;
1192
1193 self->size = -1;
1194
1195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "et|sii:BZ2File",
1196 kwlist, Py_FileSystemDefaultEncoding,
1197 &name, &mode, &buffering,
1198 &compresslevel))
1199 return -1;
1200
1201 if (compresslevel < 1 || compresslevel > 9) {
1202 PyErr_SetString(PyExc_ValueError,
1203 "compresslevel must be between 1 and 9");
1204 return -1;
1205 }
1206
1207 for (;;) {
1208 int error = 0;
1209 switch (*mode) {
1210 case 'r':
1211 case 'w':
1212 if (mode_char)
1213 error = 1;
1214 mode_char = *mode;
1215 break;
1216
1217 case 'b':
1218 break;
1219
1220 case 'U':
1221 univ_newline = 1;
1222 break;
1223
1224 default:
1225 error = 1;
1226 }
1227 if (error) {
1228 PyErr_SetString(PyExc_ValueError, "invalid mode");
1229 return -1;
1230 }
1231 mode++;
1232 if (*mode == 0)
1233 break;
1234 }
1235
1236 if (mode_char == 'r')
1237 mode = univ_newline ? "rbU" : "rb";
1238 else
1239 mode = univ_newline ? "wbU" : "wb";
1240
1241 file_args = Py_BuildValue("(ssi)", name, mode, buffering);
1242 if (!file_args)
1243 goto error;
1244
1245 if (PyFile_Type.tp_init((PyObject *)self, file_args, NULL) < 0)
1246 goto error;
1247
1248#ifdef WITH_THREAD
1249 self->lock = PyThread_allocate_lock();
1250 if (!self->lock)
1251 goto error;
1252#endif
1253
1254 if (mode_char == 'r')
1255 self->fp = BZ2_bzReadOpen(&bzerror,
1256 PyFile_AsFile((PyObject*)self),
1257 0, 0, NULL, 0);
1258 else
1259 self->fp = BZ2_bzWriteOpen(&bzerror,
1260 PyFile_AsFile((PyObject*)self),
1261 compresslevel, 0, 0);
1262
1263 if (bzerror != BZ_OK) {
1264 Util_CatchBZ2Error(bzerror);
1265 goto error;
1266 }
1267
1268 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1269
1270 Py_XDECREF(file_args);
1271 PyMem_Free(name);
1272 return 0;
1273
1274error:
1275#ifdef WITH_THREAD
1276 if (self->lock)
1277 PyThread_free_lock(self->lock);
1278#endif
1279 Py_XDECREF(file_args);
1280 PyMem_Free(name);
1281 return -1;
1282}
1283
1284static void
1285BZ2File_dealloc(BZ2FileObject *self)
1286{
1287 int bzerror;
1288#ifdef WITH_THREAD
1289 if (self->lock)
1290 PyThread_free_lock(self->lock);
1291#endif
1292 switch (self->mode) {
1293 case MODE_READ:
1294 case MODE_READ_EOF:
1295 BZ2_bzReadClose(&bzerror, self->fp);
1296 break;
1297 case MODE_WRITE:
1298 BZ2_bzWriteClose(&bzerror, self->fp,
1299 0, NULL, NULL);
1300 break;
1301 }
1302 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1303}
1304
1305/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1306static PyObject *
1307BZ2File_getiter(BZ2FileObject *self)
1308{
1309 if (self->mode == MODE_CLOSED) {
1310 PyErr_SetString(PyExc_ValueError,
1311 "I/O operation on closed file");
1312 return NULL;
1313 }
1314 Py_INCREF((PyObject*)self);
1315 return (PyObject *)self;
1316}
1317
1318/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1319#define READAHEAD_BUFSIZE 8192
1320static PyObject *
1321BZ2File_iternext(BZ2FileObject *self)
1322{
1323 PyStringObject* ret;
1324 ACQUIRE_LOCK(self);
1325 if (self->mode == MODE_CLOSED) {
1326 PyErr_SetString(PyExc_ValueError,
1327 "I/O operation on closed file");
1328 return NULL;
1329 }
1330 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1331 RELEASE_LOCK(self);
1332 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1333 Py_XDECREF(ret);
1334 return NULL;
1335 }
1336 return (PyObject *)ret;
1337}
1338
1339/* ===================================================================== */
1340/* BZ2File_Type definition. */
1341
1342PyDoc_VAR(BZ2File__doc__) =
1343PyDoc_STR(
1344"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1345\n\
1346Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1347writing. When opened for writing, the file will be created if it doesn't\n\
1348exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1349unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1350is given, must be a number between 1 and 9.\n\
1351")
1352#ifdef WITH_UNIVERSAL_NEWLINES
1353PyDoc_STR(
1354"\n\
1355Add a 'U' to mode to open the file for input with universal newline\n\
1356support. Any line ending in the input file will be seen as a '\\n' in\n\
1357Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1358for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1359'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1360newlines are available only when reading.\n\
1361")
1362#endif
1363;
1364
1365statichere PyTypeObject BZ2File_Type = {
1366 PyObject_HEAD_INIT(NULL)
1367 0, /*ob_size*/
1368 "bz2.BZ2File", /*tp_name*/
1369 sizeof(BZ2FileObject), /*tp_basicsize*/
1370 0, /*tp_itemsize*/
1371 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1372 0, /*tp_print*/
1373 0, /*tp_getattr*/
1374 0, /*tp_setattr*/
1375 0, /*tp_compare*/
1376 0, /*tp_repr*/
1377 0, /*tp_as_number*/
1378 0, /*tp_as_sequence*/
1379 0, /*tp_as_mapping*/
1380 0, /*tp_hash*/
1381 0, /*tp_call*/
1382 0, /*tp_str*/
1383 PyObject_GenericGetAttr,/*tp_getattro*/
1384 PyObject_GenericSetAttr,/*tp_setattro*/
1385 0, /*tp_as_buffer*/
1386 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1387 BZ2File__doc__, /*tp_doc*/
1388 0, /*tp_traverse*/
1389 0, /*tp_clear*/
1390 0, /*tp_richcompare*/
1391 0, /*tp_weaklistoffset*/
1392 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1393 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1394 BZ2File_methods, /*tp_methods*/
1395 0, /*tp_members*/
1396 0, /*tp_getset*/
1397 0, /*tp_base*/
1398 0, /*tp_dict*/
1399 0, /*tp_descr_get*/
1400 0, /*tp_descr_set*/
1401 0, /*tp_dictoffset*/
1402 (initproc)BZ2File_init, /*tp_init*/
1403 PyType_GenericAlloc, /*tp_alloc*/
1404 0, /*tp_new*/
1405 _PyObject_Del, /*tp_free*/
1406 0, /*tp_is_gc*/
1407};
1408
1409
1410/* ===================================================================== */
1411/* Methods of BZ2Comp. */
1412
1413PyDoc_STRVAR(BZ2Comp_compress__doc__,
1414"compress(data) -> string\n\
1415\n\
1416Provide more data to the compressor object. It will return chunks of\n\
1417compressed data whenever possible. When you've finished providing data\n\
1418to compress, call the flush() method to finish the compression process,\n\
1419and return what is left in the internal buffers.\n\
1420");
1421
1422static PyObject *
1423BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1424{
1425 char *data;
1426 int datasize;
1427 int bufsize = SMALLCHUNK;
1428 long totalout;
1429 PyObject *ret;
1430 bz_stream *bzs = &self->bzs;
1431 int bzerror;
1432
1433 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1434 return NULL;
1435
1436 ACQUIRE_LOCK(self);
1437 if (!self->running) {
1438 PyErr_SetString(PyExc_ValueError, "this object was already "
1439 "flushed");
1440 goto error;
1441 }
1442
1443 ret = PyString_FromStringAndSize(NULL, bufsize);
1444 if (!ret)
1445 goto error;
1446
1447 bzs->next_in = data;
1448 bzs->avail_in = datasize;
1449 bzs->next_out = BUF(ret);
1450 bzs->avail_out = bufsize;
1451
1452 totalout = BZS_TOTAL_OUT(bzs);
1453
1454 for (;;) {
1455 Py_BEGIN_ALLOW_THREADS
1456 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1457 Py_END_ALLOW_THREADS
1458 if (bzerror != BZ_RUN_OK) {
1459 Util_CatchBZ2Error(bzerror);
1460 goto error;
1461 }
1462 if (bzs->avail_out == 0) {
1463 bufsize = Util_NewBufferSize(bufsize);
1464 if (_PyString_Resize(&ret, bufsize) < 0) {
1465 BZ2_bzCompressEnd(bzs);
1466 goto error;
1467 }
1468 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1469 - totalout);
1470 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1471 } else if (bzs->avail_in == 0) {
1472 break;
1473 }
1474 }
1475
1476 _PyString_Resize(&ret, BZS_TOTAL_OUT(bzs) - totalout);
1477
1478 RELEASE_LOCK(self);
1479 return ret;
1480
1481error:
1482 RELEASE_LOCK(self);
1483 Py_XDECREF(ret);
1484 return NULL;
1485}
1486
1487PyDoc_STRVAR(BZ2Comp_flush__doc__,
1488"flush() -> string\n\
1489\n\
1490Finish the compression process and return what is left in internal buffers.\n\
1491You must not use the compressor object after calling this method.\n\
1492");
1493
1494static PyObject *
1495BZ2Comp_flush(BZ2CompObject *self)
1496{
1497 int bufsize = SMALLCHUNK;
1498 PyObject *ret;
1499 bz_stream *bzs = &self->bzs;
1500 int totalout;
1501 int bzerror;
1502
1503 ACQUIRE_LOCK(self);
1504 if (!self->running) {
1505 PyErr_SetString(PyExc_ValueError, "object was already "
1506 "flushed");
1507 goto error;
1508 }
1509 self->running = 0;
1510
1511 ret = PyString_FromStringAndSize(NULL, bufsize);
1512 if (!ret)
1513 goto error;
1514
1515 bzs->next_out = BUF(ret);
1516 bzs->avail_out = bufsize;
1517
1518 totalout = BZS_TOTAL_OUT(bzs);
1519
1520 for (;;) {
1521 Py_BEGIN_ALLOW_THREADS
1522 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1523 Py_END_ALLOW_THREADS
1524 if (bzerror == BZ_STREAM_END) {
1525 break;
1526 } else if (bzerror != BZ_FINISH_OK) {
1527 Util_CatchBZ2Error(bzerror);
1528 goto error;
1529 }
1530 if (bzs->avail_out == 0) {
1531 bufsize = Util_NewBufferSize(bufsize);
1532 if (_PyString_Resize(&ret, bufsize) < 0)
1533 goto error;
1534 bzs->next_out = BUF(ret);
1535 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1536 - totalout);
1537 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1538 }
1539 }
1540
1541 if (bzs->avail_out != 0)
1542 _PyString_Resize(&ret, BZS_TOTAL_OUT(bzs) - totalout);
1543
1544 RELEASE_LOCK(self);
1545 return ret;
1546
1547error:
1548 RELEASE_LOCK(self);
1549 Py_XDECREF(ret);
1550 return NULL;
1551}
1552
1553static PyMethodDef BZ2Comp_methods[] = {
1554 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS, BZ2Comp_compress__doc__},
1555 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS, BZ2Comp_flush__doc__},
1556 {NULL, NULL} /* sentinel */
1557};
1558
1559
1560/* ===================================================================== */
1561/* Slot definitions for BZ2Comp_Type. */
1562
1563static int
1564BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1565{
1566 int compresslevel = 9;
1567 int bzerror;
1568 static char *kwlist[] = {"compresslevel", 0};
1569
1570 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1571 kwlist, &compresslevel))
1572 return -1;
1573
1574 if (compresslevel < 1 || compresslevel > 9) {
1575 PyErr_SetString(PyExc_ValueError,
1576 "compresslevel must be between 1 and 9");
1577 goto error;
1578 }
1579
1580#ifdef WITH_THREAD
1581 self->lock = PyThread_allocate_lock();
1582 if (!self->lock)
1583 goto error;
1584#endif
1585
1586 memset(&self->bzs, 0, sizeof(bz_stream));
1587 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1588 if (bzerror != BZ_OK) {
1589 Util_CatchBZ2Error(bzerror);
1590 goto error;
1591 }
1592
1593 self->running = 1;
1594
1595 return 0;
1596error:
1597#ifdef WITH_THREAD
1598 if (self->lock)
1599 PyThread_free_lock(self->lock);
1600#endif
1601 return -1;
1602}
1603
1604static void
1605BZ2Comp_dealloc(BZ2CompObject *self)
1606{
1607#ifdef WITH_THREAD
1608 if (self->lock)
1609 PyThread_free_lock(self->lock);
1610#endif
1611 BZ2_bzCompressEnd(&self->bzs);
1612 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1613}
1614
1615
1616/* ===================================================================== */
1617/* BZ2Comp_Type definition. */
1618
1619PyDoc_STRVAR(BZ2Comp__doc__,
1620"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1621\n\
1622Create a new compressor object. This object may be used to compress\n\
1623data sequentially. If you want to compress data in one shot, use the\n\
1624compress() function instead. The compresslevel parameter, if given,\n\
1625must be a number between 1 and 9.\n\
1626");
1627
1628statichere PyTypeObject BZ2Comp_Type = {
1629 PyObject_HEAD_INIT(NULL)
1630 0, /*ob_size*/
1631 "bz2.BZ2Compressor", /*tp_name*/
1632 sizeof(BZ2CompObject), /*tp_basicsize*/
1633 0, /*tp_itemsize*/
1634 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1635 0, /*tp_print*/
1636 0, /*tp_getattr*/
1637 0, /*tp_setattr*/
1638 0, /*tp_compare*/
1639 0, /*tp_repr*/
1640 0, /*tp_as_number*/
1641 0, /*tp_as_sequence*/
1642 0, /*tp_as_mapping*/
1643 0, /*tp_hash*/
1644 0, /*tp_call*/
1645 0, /*tp_str*/
1646 PyObject_GenericGetAttr,/*tp_getattro*/
1647 PyObject_GenericSetAttr,/*tp_setattro*/
1648 0, /*tp_as_buffer*/
1649 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1650 BZ2Comp__doc__, /*tp_doc*/
1651 0, /*tp_traverse*/
1652 0, /*tp_clear*/
1653 0, /*tp_richcompare*/
1654 0, /*tp_weaklistoffset*/
1655 0, /*tp_iter*/
1656 0, /*tp_iternext*/
1657 BZ2Comp_methods, /*tp_methods*/
1658 0, /*tp_members*/
1659 0, /*tp_getset*/
1660 0, /*tp_base*/
1661 0, /*tp_dict*/
1662 0, /*tp_descr_get*/
1663 0, /*tp_descr_set*/
1664 0, /*tp_dictoffset*/
1665 (initproc)BZ2Comp_init, /*tp_init*/
1666 PyType_GenericAlloc, /*tp_alloc*/
1667 PyType_GenericNew, /*tp_new*/
1668 _PyObject_Del, /*tp_free*/
1669 0, /*tp_is_gc*/
1670};
1671
1672
1673/* ===================================================================== */
1674/* Members of BZ2Decomp. */
1675
1676#define OFF(x) offsetof(BZ2DecompObject, x)
1677
1678static PyMemberDef BZ2Decomp_members[] = {
1679 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1680 {NULL} /* Sentinel */
1681};
1682
1683
1684/* ===================================================================== */
1685/* Methods of BZ2Decomp. */
1686
1687PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1688"decompress(data) -> string\n\
1689\n\
1690Provide more data to the decompressor object. It will return chunks\n\
1691of decompressed data whenever possible. If you try to decompress data\n\
1692after the end of stream is found, EOFError will be raised. If any data\n\
1693was found after the end of stream, it'll be ignored and saved in\n\
1694unused_data attribute.\n\
1695");
1696
1697static PyObject *
1698BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1699{
1700 char *data;
1701 int datasize;
1702 int bufsize = SMALLCHUNK;
1703 long totalout;
1704 PyObject *ret;
1705 bz_stream *bzs = &self->bzs;
1706 int bzerror;
1707
1708 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1709 return NULL;
1710
1711 ACQUIRE_LOCK(self);
1712 if (!self->running) {
1713 PyErr_SetString(PyExc_EOFError, "end of stream was "
1714 "already found");
1715 goto error;
1716 }
1717
1718 ret = PyString_FromStringAndSize(NULL, bufsize);
1719 if (!ret)
1720 goto error;
1721
1722 bzs->next_in = data;
1723 bzs->avail_in = datasize;
1724 bzs->next_out = BUF(ret);
1725 bzs->avail_out = bufsize;
1726
1727 totalout = BZS_TOTAL_OUT(bzs);
1728
1729 for (;;) {
1730 Py_BEGIN_ALLOW_THREADS
1731 bzerror = BZ2_bzDecompress(bzs);
1732 Py_END_ALLOW_THREADS
1733 if (bzerror == BZ_STREAM_END) {
1734 if (bzs->avail_in != 0) {
1735 Py_DECREF(self->unused_data);
1736 self->unused_data =
1737 PyString_FromStringAndSize(bzs->next_in,
1738 bzs->avail_in);
1739 }
1740 self->running = 0;
1741 break;
1742 }
1743 if (bzerror != BZ_OK) {
1744 Util_CatchBZ2Error(bzerror);
1745 goto error;
1746 }
1747 if (bzs->avail_out == 0) {
1748 bufsize = Util_NewBufferSize(bufsize);
1749 if (_PyString_Resize(&ret, bufsize) < 0) {
1750 BZ2_bzDecompressEnd(bzs);
1751 goto error;
1752 }
1753 bzs->next_out = BUF(ret);
1754 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1755 - totalout);
1756 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1757 } else if (bzs->avail_in == 0) {
1758 break;
1759 }
1760 }
1761
1762 if (bzs->avail_out != 0)
1763 _PyString_Resize(&ret, BZS_TOTAL_OUT(bzs) - totalout);
1764
1765 RELEASE_LOCK(self);
1766 return ret;
1767
1768error:
1769 RELEASE_LOCK(self);
1770 Py_XDECREF(ret);
1771 return NULL;
1772}
1773
1774static PyMethodDef BZ2Decomp_methods[] = {
1775 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1776 {NULL, NULL} /* sentinel */
1777};
1778
1779
1780/* ===================================================================== */
1781/* Slot definitions for BZ2Decomp_Type. */
1782
1783static int
1784BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1785{
1786 int bzerror;
1787
1788 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1789 return -1;
1790
1791#ifdef WITH_THREAD
1792 self->lock = PyThread_allocate_lock();
1793 if (!self->lock)
1794 goto error;
1795#endif
1796
1797 self->unused_data = PyString_FromString("");
1798 if (!self->unused_data)
1799 goto error;
1800
1801 memset(&self->bzs, 0, sizeof(bz_stream));
1802 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1803 if (bzerror != BZ_OK) {
1804 Util_CatchBZ2Error(bzerror);
1805 goto error;
1806 }
1807
1808 self->running = 1;
1809
1810 return 0;
1811
1812error:
1813#ifdef WITH_THREAD
1814 if (self->lock)
1815 PyThread_free_lock(self->lock);
1816#endif
1817 Py_XDECREF(self->unused_data);
1818 return -1;
1819}
1820
1821static void
1822BZ2Decomp_dealloc(BZ2DecompObject *self)
1823{
1824#ifdef WITH_THREAD
1825 if (self->lock)
1826 PyThread_free_lock(self->lock);
1827#endif
1828 Py_XDECREF(self->unused_data);
1829 BZ2_bzDecompressEnd(&self->bzs);
1830 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1831}
1832
1833
1834/* ===================================================================== */
1835/* BZ2Decomp_Type definition. */
1836
1837PyDoc_STRVAR(BZ2Decomp__doc__,
1838"BZ2Decompressor() -> decompressor object\n\
1839\n\
1840Create a new decompressor object. This object may be used to decompress\n\
1841data sequentially. If you want to decompress data in one shot, use the\n\
1842decompress() function instead.\n\
1843");
1844
1845statichere PyTypeObject BZ2Decomp_Type = {
1846 PyObject_HEAD_INIT(NULL)
1847 0, /*ob_size*/
1848 "bz2.BZ2Decompressor", /*tp_name*/
1849 sizeof(BZ2DecompObject), /*tp_basicsize*/
1850 0, /*tp_itemsize*/
1851 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1852 0, /*tp_print*/
1853 0, /*tp_getattr*/
1854 0, /*tp_setattr*/
1855 0, /*tp_compare*/
1856 0, /*tp_repr*/
1857 0, /*tp_as_number*/
1858 0, /*tp_as_sequence*/
1859 0, /*tp_as_mapping*/
1860 0, /*tp_hash*/
1861 0, /*tp_call*/
1862 0, /*tp_str*/
1863 PyObject_GenericGetAttr,/*tp_getattro*/
1864 PyObject_GenericSetAttr,/*tp_setattro*/
1865 0, /*tp_as_buffer*/
1866 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1867 BZ2Decomp__doc__, /*tp_doc*/
1868 0, /*tp_traverse*/
1869 0, /*tp_clear*/
1870 0, /*tp_richcompare*/
1871 0, /*tp_weaklistoffset*/
1872 0, /*tp_iter*/
1873 0, /*tp_iternext*/
1874 BZ2Decomp_methods, /*tp_methods*/
1875 BZ2Decomp_members, /*tp_members*/
1876 0, /*tp_getset*/
1877 0, /*tp_base*/
1878 0, /*tp_dict*/
1879 0, /*tp_descr_get*/
1880 0, /*tp_descr_set*/
1881 0, /*tp_dictoffset*/
1882 (initproc)BZ2Decomp_init, /*tp_init*/
1883 PyType_GenericAlloc, /*tp_alloc*/
1884 PyType_GenericNew, /*tp_new*/
1885 _PyObject_Del, /*tp_free*/
1886 0, /*tp_is_gc*/
1887};
1888
1889
1890/* ===================================================================== */
1891/* Module functions. */
1892
1893PyDoc_STRVAR(bz2_compress__doc__,
1894"compress(data [, compresslevel=9]) -> string\n\
1895\n\
1896Compress data in one shot. If you want to compress data sequentially,\n\
1897use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1898given, must be a number between 1 and 9.\n\
1899");
1900
1901static PyObject *
1902bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1903{
1904 int compresslevel=9;
1905 char *data;
1906 int datasize;
1907 int bufsize;
1908 PyObject *ret;
1909 bz_stream _bzs;
1910 bz_stream *bzs = &_bzs;
1911 int bzerror;
1912 static char *kwlist[] = {"data", "compresslevel", 0};
1913
1914 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1915 kwlist, &data, &datasize,
1916 &compresslevel))
1917 return NULL;
1918
1919 if (compresslevel < 1 || compresslevel > 9) {
1920 PyErr_SetString(PyExc_ValueError,
1921 "compresslevel must be between 1 and 9");
1922 return NULL;
1923 }
1924
1925 /* Conforming to bz2 manual, this is large enough to fit compressed
1926 * data in one shot. We will check it later anyway. */
1927 bufsize = datasize + (datasize/100+1) + 600;
1928
1929 ret = PyString_FromStringAndSize(NULL, bufsize);
1930 if (!ret)
1931 return NULL;
1932
1933 memset(bzs, 0, sizeof(bz_stream));
1934
1935 bzs->next_in = data;
1936 bzs->avail_in = datasize;
1937 bzs->next_out = BUF(ret);
1938 bzs->avail_out = bufsize;
1939
1940 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1941 if (bzerror != BZ_OK) {
1942 Util_CatchBZ2Error(bzerror);
1943 Py_DECREF(ret);
1944 return NULL;
1945 }
1946
1947 for (;;) {
1948 Py_BEGIN_ALLOW_THREADS
1949 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1950 Py_END_ALLOW_THREADS
1951 if (bzerror == BZ_STREAM_END) {
1952 break;
1953 } else if (bzerror != BZ_FINISH_OK) {
1954 BZ2_bzCompressEnd(bzs);
1955 Util_CatchBZ2Error(bzerror);
1956 Py_DECREF(ret);
1957 return NULL;
1958 }
1959 if (bzs->avail_out == 0) {
1960 bufsize = Util_NewBufferSize(bufsize);
1961 if (_PyString_Resize(&ret, bufsize) < 0) {
1962 BZ2_bzCompressEnd(bzs);
1963 Py_DECREF(ret);
1964 return NULL;
1965 }
1966 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1967 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1968 }
1969 }
1970
1971 if (bzs->avail_out != 0)
1972 _PyString_Resize(&ret, BZS_TOTAL_OUT(bzs));
1973 BZ2_bzCompressEnd(bzs);
1974
1975 return ret;
1976}
1977
1978PyDoc_STRVAR(bz2_decompress__doc__,
1979"decompress(data) -> decompressed data\n\
1980\n\
1981Decompress data in one shot. If you want to decompress data sequentially,\n\
1982use an instance of BZ2Decompressor instead.\n\
1983");
1984
1985static PyObject *
1986bz2_decompress(PyObject *self, PyObject *args)
1987{
1988 char *data;
1989 int datasize;
1990 int bufsize = SMALLCHUNK;
1991 PyObject *ret;
1992 bz_stream _bzs;
1993 bz_stream *bzs = &_bzs;
1994 int bzerror;
1995
1996 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1997 return NULL;
1998
1999 if (datasize == 0)
2000 return PyString_FromString("");
2001
2002 ret = PyString_FromStringAndSize(NULL, bufsize);
2003 if (!ret)
2004 return NULL;
2005
2006 memset(bzs, 0, sizeof(bz_stream));
2007
2008 bzs->next_in = data;
2009 bzs->avail_in = datasize;
2010 bzs->next_out = BUF(ret);
2011 bzs->avail_out = bufsize;
2012
2013 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2014 if (bzerror != BZ_OK) {
2015 Util_CatchBZ2Error(bzerror);
2016 Py_DECREF(ret);
2017 return NULL;
2018 }
2019
2020 for (;;) {
2021 Py_BEGIN_ALLOW_THREADS
2022 bzerror = BZ2_bzDecompress(bzs);
2023 Py_END_ALLOW_THREADS
2024 if (bzerror == BZ_STREAM_END) {
2025 break;
2026 } else if (bzerror != BZ_OK) {
2027 BZ2_bzDecompressEnd(bzs);
2028 Util_CatchBZ2Error(bzerror);
2029 Py_DECREF(ret);
2030 return NULL;
2031 }
2032 if (bzs->avail_out == 0) {
2033 bufsize = Util_NewBufferSize(bufsize);
2034 if (_PyString_Resize(&ret, bufsize) < 0) {
2035 BZ2_bzDecompressEnd(bzs);
2036 Py_DECREF(ret);
2037 return NULL;
2038 }
2039 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2040 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2041 } else if (bzs->avail_in == 0) {
2042 BZ2_bzDecompressEnd(bzs);
2043 PyErr_SetString(PyExc_ValueError,
2044 "couldn't find end of stream");
2045 Py_DECREF(ret);
2046 return NULL;
2047 }
2048 }
2049
2050 if (bzs->avail_out != 0)
2051 _PyString_Resize(&ret, BZS_TOTAL_OUT(bzs));
2052 BZ2_bzDecompressEnd(bzs);
2053
2054 return ret;
2055}
2056
2057static PyMethodDef bz2_methods[] = {
2058 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2059 bz2_compress__doc__},
2060 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2061 bz2_decompress__doc__},
2062 {NULL, NULL} /* sentinel */
2063};
2064
2065/* ===================================================================== */
2066/* Initialization function. */
2067
2068PyDoc_STRVAR(bz2__doc__,
2069"The python bz2 module provides a comprehensive interface for\n\
2070the bz2 compression library. It implements a complete file\n\
2071interface, one shot (de)compression functions, and types for\n\
2072sequential (de)compression.\n\
2073");
2074
2075DL_EXPORT(void)
2076initbz2(void)
2077{
2078 PyObject *m;
2079
2080 BZ2File_Type.ob_type = &PyType_Type;
2081 BZ2File_Type.tp_base = &PyFile_Type;
2082 BZ2File_Type.tp_new = PyFile_Type.tp_new;
2083
2084 BZ2Comp_Type.ob_type = &PyType_Type;
2085 BZ2Decomp_Type.ob_type = &PyType_Type;
2086
2087 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2088
2089 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2090
2091 Py_INCREF(&BZ2File_Type);
2092 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2093
2094 Py_INCREF(&BZ2Comp_Type);
2095 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2096
2097 Py_INCREF(&BZ2Decomp_Type);
2098 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2099}