blob: 114070fab1044514aae50b799e7a785f44d2b46c [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED 0
28#define MODE_READ 1
29#define MODE_READ_EOF 2
30#define MODE_WRITE 3
31
32#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
33
34#if SIZEOF_LONG >= 8
35#define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37#elif SIZEOF_LONG_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
Tim Peterse3228092002-11-09 04:21:44 +000039 (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000040#else
41#define BZS_TOTAL_OUT(bzs) \
42 bzs->total_out_lo32;
43#endif
44
45#ifdef WITH_THREAD
46#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48#else
49#define ACQUIRE_LOCK(obj)
50#define RELEASE_LOCK(obj)
51#endif
52
53#ifdef WITH_UNIVERSAL_NEWLINES
54/* Bits in f_newlinetypes */
55#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56#define NEWLINE_CR 1 /* \r newline seen */
57#define NEWLINE_LF 2 /* \n newline seen */
58#define NEWLINE_CRLF 4 /* \r\n newline seen */
59#endif
60
61/* ===================================================================== */
62/* Structure definitions. */
63
64typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000065 PyObject_HEAD
66 PyObject *file;
67
68 char* f_buf; /* Allocated readahead buffer */
69 char* f_bufend; /* Points after last occupied position */
70 char* f_bufptr; /* Current buffer position */
71
72 int f_softspace; /* Flag used by 'print' command */
73
74#ifdef WITH_UNIVERSAL_NEWLINES
75 int f_univ_newline; /* Handle any newline convention */
76 int f_newlinetypes; /* Types of newlines seen */
77 int f_skipnextlf; /* Skip next \n */
78#endif
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080 BZFILE *fp;
81 int mode;
82 long pos;
83 long size;
84#ifdef WITH_THREAD
85 PyThread_type_lock lock;
86#endif
87} BZ2FileObject;
88
89typedef struct {
90 PyObject_HEAD
91 bz_stream bzs;
92 int running;
93#ifdef WITH_THREAD
94 PyThread_type_lock lock;
95#endif
96} BZ2CompObject;
97
98typedef struct {
99 PyObject_HEAD
100 bz_stream bzs;
101 int running;
102 PyObject *unused_data;
103#ifdef WITH_THREAD
104 PyThread_type_lock lock;
105#endif
106} BZ2DecompObject;
107
108/* ===================================================================== */
109/* Utility functions. */
110
111static int
112Util_CatchBZ2Error(int bzerror)
113{
114 int ret = 0;
115 switch(bzerror) {
116 case BZ_OK:
117 case BZ_STREAM_END:
118 break;
119
120 case BZ_CONFIG_ERROR:
121 PyErr_SetString(PyExc_SystemError,
122 "the bz2 library was not compiled "
123 "correctly");
124 ret = 1;
125 break;
Tim Peterse3228092002-11-09 04:21:44 +0000126
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000127 case BZ_PARAM_ERROR:
128 PyErr_SetString(PyExc_ValueError,
129 "the bz2 library has received wrong "
130 "parameters");
131 ret = 1;
132 break;
Tim Peterse3228092002-11-09 04:21:44 +0000133
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000134 case BZ_MEM_ERROR:
135 PyErr_NoMemory();
136 ret = 1;
137 break;
138
139 case BZ_DATA_ERROR:
140 case BZ_DATA_ERROR_MAGIC:
141 PyErr_SetString(PyExc_IOError, "invalid data stream");
142 ret = 1;
143 break;
144
145 case BZ_IO_ERROR:
146 PyErr_SetString(PyExc_IOError, "unknown IO error");
147 ret = 1;
148 break;
149
150 case BZ_UNEXPECTED_EOF:
151 PyErr_SetString(PyExc_EOFError,
152 "compressed file ended before the "
153 "logical end-of-stream was detected");
154 ret = 1;
155 break;
156
157 case BZ_SEQUENCE_ERROR:
158 PyErr_SetString(PyExc_RuntimeError,
159 "wrong sequence of bz2 library "
160 "commands used");
161 ret = 1;
162 break;
163 }
164 return ret;
165}
166
167#if BUFSIZ < 8192
168#define SMALLCHUNK 8192
169#else
170#define SMALLCHUNK BUFSIZ
171#endif
172
173#if SIZEOF_INT < 4
174#define BIGCHUNK (512 * 32)
175#else
176#define BIGCHUNK (512 * 1024)
177#endif
178
179/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
180static size_t
181Util_NewBufferSize(size_t currentsize)
182{
183 if (currentsize > SMALLCHUNK) {
184 /* Keep doubling until we reach BIGCHUNK;
185 then keep adding BIGCHUNK. */
186 if (currentsize <= BIGCHUNK)
187 return currentsize + currentsize;
188 else
189 return currentsize + BIGCHUNK;
190 }
191 return currentsize + SMALLCHUNK;
192}
193
194/* This is a hacked version of Python's fileobject.c:get_line(). */
195static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000196Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000197{
198 char c;
199 char *buf, *end;
200 size_t total_v_size; /* total # of slots in buffer */
201 size_t used_v_size; /* # used slots in buffer */
202 size_t increment; /* amount to increment the buffer */
203 PyObject *v;
204 int bzerror;
205#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000206 int newlinetypes = f->f_newlinetypes;
207 int skipnextlf = f->f_skipnextlf;
208 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000209#endif
210
211 total_v_size = n > 0 ? n : 100;
212 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
213 if (v == NULL)
214 return NULL;
215
216 buf = BUF(v);
217 end = buf + total_v_size;
218
219 for (;;) {
220 Py_BEGIN_ALLOW_THREADS
221#ifdef WITH_UNIVERSAL_NEWLINES
222 if (univ_newline) {
223 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000224 BZ2_bzRead(&bzerror, f->fp, &c, 1);
225 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000226 if (bzerror != BZ_OK || buf == end)
227 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000228 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229 skipnextlf = 0;
230 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000231 /* Seeing a \n here with
232 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000233 * saw a \r before.
234 */
235 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000236 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000237 &c, 1);
238 if (bzerror != BZ_OK)
239 break;
240 } else {
241 newlinetypes |= NEWLINE_CR;
242 }
243 }
244 if (c == '\r') {
245 skipnextlf = 1;
246 c = '\n';
247 } else if ( c == '\n')
248 newlinetypes |= NEWLINE_LF;
249 *buf++ = c;
250 if (c == '\n') break;
251 }
252 if (bzerror == BZ_STREAM_END && skipnextlf)
253 newlinetypes |= NEWLINE_CR;
254 } else /* If not universal newlines use the normal loop */
255#endif
256 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000257 BZ2_bzRead(&bzerror, f->fp, &c, 1);
258 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 *buf++ = c;
260 } while (bzerror == BZ_OK && c != '\n' && buf != end);
261 Py_END_ALLOW_THREADS
262#ifdef WITH_UNIVERSAL_NEWLINES
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000263 f->f_newlinetypes = newlinetypes;
264 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000265#endif
266 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000267 f->size = f->pos;
268 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000269 break;
270 } else if (bzerror != BZ_OK) {
271 Util_CatchBZ2Error(bzerror);
272 Py_DECREF(v);
273 return NULL;
274 }
275 if (c == '\n')
276 break;
277 /* Must be because buf == end */
278 if (n > 0)
279 break;
280 used_v_size = total_v_size;
281 increment = total_v_size >> 2; /* mild exponential growth */
282 total_v_size += increment;
283 if (total_v_size > INT_MAX) {
284 PyErr_SetString(PyExc_OverflowError,
285 "line is longer than a Python string can hold");
286 Py_DECREF(v);
287 return NULL;
288 }
289 if (_PyString_Resize(&v, total_v_size) < 0)
290 return NULL;
291 buf = BUF(v) + used_v_size;
292 end = BUF(v) + total_v_size;
293 }
294
295 used_v_size = buf - BUF(v);
296 if (used_v_size != total_v_size)
297 _PyString_Resize(&v, used_v_size);
298 return v;
299}
300
301#ifndef WITH_UNIVERSAL_NEWLINES
302#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
303#else
304/* This is a hacked version of Python's
305 * fileobject.c:Py_UniversalNewlineFread(). */
306size_t
307Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000308 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000309{
310 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000311 int newlinetypes, skipnextlf;
312
313 assert(buf != NULL);
314 assert(stream != NULL);
315
316 if (!f->f_univ_newline)
317 return BZ2_bzRead(bzerror, stream, buf, n);
318
319 newlinetypes = f->f_newlinetypes;
320 skipnextlf = f->f_skipnextlf;
321
322 /* Invariant: n is the number of bytes remaining to be filled
323 * in the buffer.
324 */
325 while (n) {
326 size_t nread;
327 int shortread;
328 char *src = dst;
329
330 nread = BZ2_bzRead(bzerror, stream, dst, n);
331 assert(nread <= n);
332 n -= nread; /* assuming 1 byte out for each in; will adjust */
333 shortread = n != 0; /* true iff EOF or error */
334 while (nread--) {
335 char c = *src++;
336 if (c == '\r') {
337 /* Save as LF and set flag to skip next LF. */
338 *dst++ = '\n';
339 skipnextlf = 1;
340 }
341 else if (skipnextlf && c == '\n') {
342 /* Skip LF, and remember we saw CR LF. */
343 skipnextlf = 0;
344 newlinetypes |= NEWLINE_CRLF;
345 ++n;
346 }
347 else {
348 /* Normal char to be stored in buffer. Also
349 * update the newlinetypes flag if either this
350 * is an LF or the previous char was a CR.
351 */
352 if (c == '\n')
353 newlinetypes |= NEWLINE_LF;
354 else if (skipnextlf)
355 newlinetypes |= NEWLINE_CR;
356 *dst++ = c;
357 skipnextlf = 0;
358 }
359 }
360 if (shortread) {
361 /* If this is EOF, update type flags. */
362 if (skipnextlf && *bzerror == BZ_STREAM_END)
363 newlinetypes |= NEWLINE_CR;
364 break;
365 }
366 }
367 f->f_newlinetypes = newlinetypes;
368 f->f_skipnextlf = skipnextlf;
369 return dst - buf;
370}
371#endif
372
373/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
374static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000375Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000376{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000377 if (f->f_buf != NULL) {
378 PyMem_Free(f->f_buf);
379 f->f_buf = NULL;
380 }
381}
382
383/* This is a hacked version of Python's fileobject.c:readahead(). */
384static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000385Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000386{
387 int chunksize;
388 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389
390 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000391 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000392 return 0;
393 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000394 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000396 if (f->mode == MODE_READ_EOF) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000397 return -1;
398 }
399 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
400 return -1;
401 }
402 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000403 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
404 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000405 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000406 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000407 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408 f->size = f->pos;
409 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000410 } else if (bzerror != BZ_OK) {
411 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000412 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413 return -1;
414 }
415 f->f_bufptr = f->f_buf;
416 f->f_bufend = f->f_buf + chunksize;
417 return 0;
418}
419
420/* This is a hacked version of Python's
421 * fileobject.c:readahead_get_line_skip(). */
422static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000425 PyStringObject* s;
426 char *bufptr;
427 char *buf;
428 int len;
429
430 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 return NULL;
433
434 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000435 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 return (PyStringObject *)
437 PyString_FromStringAndSize(NULL, skip);
438 bufptr = memchr(f->f_bufptr, '\n', len);
439 if (bufptr != NULL) {
440 bufptr++; /* Count the '\n' */
441 len = bufptr - f->f_bufptr;
442 s = (PyStringObject *)
443 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000444 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445 return NULL;
446 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
447 f->f_bufptr = bufptr;
448 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000449 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 } else {
451 bufptr = f->f_bufptr;
452 buf = f->f_buf;
453 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000454 s = Util_ReadAheadGetLineSkip(f, skip+len,
455 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000456 if (s == NULL) {
457 PyMem_Free(buf);
458 return NULL;
459 }
460 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
461 PyMem_Free(buf);
462 }
463 return s;
464}
465
466/* ===================================================================== */
467/* Methods of BZ2File. */
468
469PyDoc_STRVAR(BZ2File_read__doc__,
470"read([size]) -> string\n\
471\n\
472Read at most size uncompressed bytes, returned as a string. If the size\n\
473argument is negative or omitted, read until EOF is reached.\n\
474");
475
476/* This is a hacked version of Python's fileobject.c:file_read(). */
477static PyObject *
478BZ2File_read(BZ2FileObject *self, PyObject *args)
479{
480 long bytesrequested = -1;
481 size_t bytesread, buffersize, chunksize;
482 int bzerror;
483 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000484
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000485 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
486 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000487
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000488 ACQUIRE_LOCK(self);
489 switch (self->mode) {
490 case MODE_READ:
491 break;
492 case MODE_READ_EOF:
493 ret = PyString_FromString("");
494 goto cleanup;
495 case MODE_CLOSED:
496 PyErr_SetString(PyExc_ValueError,
497 "I/O operation on closed file");
498 goto cleanup;
499 default:
500 PyErr_SetString(PyExc_IOError,
501 "file is not ready for reading");
502 goto cleanup;
503 }
504
505 if (bytesrequested < 0)
506 buffersize = Util_NewBufferSize((size_t)0);
507 else
508 buffersize = bytesrequested;
509 if (buffersize > INT_MAX) {
510 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000511 "requested number of bytes is "
512 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 goto cleanup;
514 }
515 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
516 if (ret == NULL)
517 goto cleanup;
518 bytesread = 0;
519
520 for (;;) {
521 Py_BEGIN_ALLOW_THREADS
522 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
523 BUF(ret)+bytesread,
524 buffersize-bytesread,
525 self);
526 self->pos += chunksize;
527 Py_END_ALLOW_THREADS
528 bytesread += chunksize;
529 if (bzerror == BZ_STREAM_END) {
530 self->size = self->pos;
531 self->mode = MODE_READ_EOF;
532 break;
533 } else if (bzerror != BZ_OK) {
534 Util_CatchBZ2Error(bzerror);
535 Py_DECREF(ret);
536 ret = NULL;
537 goto cleanup;
538 }
539 if (bytesrequested < 0) {
540 buffersize = Util_NewBufferSize(buffersize);
541 if (_PyString_Resize(&ret, buffersize) < 0)
542 goto cleanup;
543 } else {
544 break;
545 }
546 }
547 if (bytesread != buffersize)
548 _PyString_Resize(&ret, bytesread);
549
550cleanup:
551 RELEASE_LOCK(self);
552 return ret;
553}
554
555PyDoc_STRVAR(BZ2File_readline__doc__,
556"readline([size]) -> string\n\
557\n\
558Return the next line from the file, as a string, retaining newline.\n\
559A non-negative size argument will limit the maximum number of bytes to\n\
560return (an incomplete line may be returned then). Return an empty\n\
561string at EOF.\n\
562");
563
564static PyObject *
565BZ2File_readline(BZ2FileObject *self, PyObject *args)
566{
567 PyObject *ret = NULL;
568 int sizehint = -1;
569
570 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
571 return NULL;
572
573 ACQUIRE_LOCK(self);
574 switch (self->mode) {
575 case MODE_READ:
576 break;
577 case MODE_READ_EOF:
578 ret = PyString_FromString("");
579 goto cleanup;
580 case MODE_CLOSED:
581 PyErr_SetString(PyExc_ValueError,
582 "I/O operation on closed file");
583 goto cleanup;
584 default:
585 PyErr_SetString(PyExc_IOError,
586 "file is not ready for reading");
587 goto cleanup;
588 }
589
590 if (sizehint == 0)
591 ret = PyString_FromString("");
592 else
593 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
594
595cleanup:
596 RELEASE_LOCK(self);
597 return ret;
598}
599
600PyDoc_STRVAR(BZ2File_readlines__doc__,
601"readlines([size]) -> list\n\
602\n\
603Call readline() repeatedly and return a list of lines read.\n\
604The optional size argument, if given, is an approximate bound on the\n\
605total number of bytes in the lines returned.\n\
606");
607
608/* This is a hacked version of Python's fileobject.c:file_readlines(). */
609static PyObject *
610BZ2File_readlines(BZ2FileObject *self, PyObject *args)
611{
612 long sizehint = 0;
613 PyObject *list = NULL;
614 PyObject *line;
615 char small_buffer[SMALLCHUNK];
616 char *buffer = small_buffer;
617 size_t buffersize = SMALLCHUNK;
618 PyObject *big_buffer = NULL;
619 size_t nfilled = 0;
620 size_t nread;
621 size_t totalread = 0;
622 char *p, *q, *end;
623 int err;
624 int shortread = 0;
625 int bzerror;
626
627 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
628 return NULL;
629
630 ACQUIRE_LOCK(self);
631 switch (self->mode) {
632 case MODE_READ:
633 break;
634 case MODE_READ_EOF:
635 list = PyList_New(0);
636 goto cleanup;
637 case MODE_CLOSED:
638 PyErr_SetString(PyExc_ValueError,
639 "I/O operation on closed file");
640 goto cleanup;
641 default:
642 PyErr_SetString(PyExc_IOError,
643 "file is not ready for reading");
644 goto cleanup;
645 }
646
647 if ((list = PyList_New(0)) == NULL)
648 goto cleanup;
649
650 for (;;) {
651 Py_BEGIN_ALLOW_THREADS
652 nread = Util_UnivNewlineRead(&bzerror, self->fp,
653 buffer+nfilled,
654 buffersize-nfilled, self);
655 self->pos += nread;
656 Py_END_ALLOW_THREADS
657 if (bzerror == BZ_STREAM_END) {
658 self->size = self->pos;
659 self->mode = MODE_READ_EOF;
660 if (nread == 0) {
661 sizehint = 0;
662 break;
663 }
664 shortread = 1;
665 } else if (bzerror != BZ_OK) {
666 Util_CatchBZ2Error(bzerror);
667 error:
668 Py_DECREF(list);
669 list = NULL;
670 goto cleanup;
671 }
672 totalread += nread;
673 p = memchr(buffer+nfilled, '\n', nread);
674 if (p == NULL) {
675 /* Need a larger buffer to fit this line */
676 nfilled += nread;
677 buffersize *= 2;
678 if (buffersize > INT_MAX) {
679 PyErr_SetString(PyExc_OverflowError,
680 "line is longer than a Python string can hold");
681 goto error;
682 }
683 if (big_buffer == NULL) {
684 /* Create the big buffer */
685 big_buffer = PyString_FromStringAndSize(
686 NULL, buffersize);
687 if (big_buffer == NULL)
688 goto error;
689 buffer = PyString_AS_STRING(big_buffer);
690 memcpy(buffer, small_buffer, nfilled);
691 }
692 else {
693 /* Grow the big buffer */
694 _PyString_Resize(&big_buffer, buffersize);
695 buffer = PyString_AS_STRING(big_buffer);
696 }
697 continue;
698 }
699 end = buffer+nfilled+nread;
700 q = buffer;
701 do {
702 /* Process complete lines */
703 p++;
704 line = PyString_FromStringAndSize(q, p-q);
705 if (line == NULL)
706 goto error;
707 err = PyList_Append(list, line);
708 Py_DECREF(line);
709 if (err != 0)
710 goto error;
711 q = p;
712 p = memchr(q, '\n', end-q);
713 } while (p != NULL);
714 /* Move the remaining incomplete line to the start */
715 nfilled = end-q;
716 memmove(buffer, q, nfilled);
717 if (sizehint > 0)
718 if (totalread >= (size_t)sizehint)
719 break;
720 if (shortread) {
721 sizehint = 0;
722 break;
723 }
724 }
725 if (nfilled != 0) {
726 /* Partial last line */
727 line = PyString_FromStringAndSize(buffer, nfilled);
728 if (line == NULL)
729 goto error;
730 if (sizehint > 0) {
731 /* Need to complete the last line */
732 PyObject *rest = Util_GetLine(self, 0);
733 if (rest == NULL) {
734 Py_DECREF(line);
735 goto error;
736 }
737 PyString_Concat(&line, rest);
738 Py_DECREF(rest);
739 if (line == NULL)
740 goto error;
741 }
742 err = PyList_Append(list, line);
743 Py_DECREF(line);
744 if (err != 0)
745 goto error;
746 }
747
748 cleanup:
749 RELEASE_LOCK(self);
750 if (big_buffer) {
751 Py_DECREF(big_buffer);
752 }
753 return list;
754}
755
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000756PyDoc_STRVAR(BZ2File_xreadlines__doc__,
757"xreadlines() -> self\n\
758\n\
759For backward compatibility. BZ2File objects now include the performance\n\
760optimizations previously implemented in the xreadlines module.\n\
761");
762
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000763PyDoc_STRVAR(BZ2File_write__doc__,
764"write(data) -> None\n\
765\n\
766Write the 'data' string to file. Note that due to buffering, close() may\n\
767be needed before the file on disk reflects the data written.\n\
768");
769
770/* This is a hacked version of Python's fileobject.c:file_write(). */
771static PyObject *
772BZ2File_write(BZ2FileObject *self, PyObject *args)
773{
774 PyObject *ret = NULL;
775 char *buf;
776 int len;
777 int bzerror;
778
779 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
780 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000781
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000782 ACQUIRE_LOCK(self);
783 switch (self->mode) {
784 case MODE_WRITE:
785 break;
Tim Peterse3228092002-11-09 04:21:44 +0000786
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000787 case MODE_CLOSED:
788 PyErr_SetString(PyExc_ValueError,
789 "I/O operation on closed file");
790 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000791
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000792 default:
793 PyErr_SetString(PyExc_IOError,
794 "file is not ready for writing");
795 goto cleanup;;
796 }
797
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000798 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000799
800 Py_BEGIN_ALLOW_THREADS
801 BZ2_bzWrite (&bzerror, self->fp, buf, len);
802 self->pos += len;
803 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000804
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 if (bzerror != BZ_OK) {
806 Util_CatchBZ2Error(bzerror);
807 goto cleanup;
808 }
Tim Peterse3228092002-11-09 04:21:44 +0000809
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 Py_INCREF(Py_None);
811 ret = Py_None;
812
813cleanup:
814 RELEASE_LOCK(self);
815 return ret;
816}
817
818PyDoc_STRVAR(BZ2File_writelines__doc__,
819"writelines(sequence_of_strings) -> None\n\
820\n\
821Write the sequence of strings to the file. Note that newlines are not\n\
822added. The sequence can be any iterable object producing strings. This is\n\
823equivalent to calling write() for each string.\n\
824");
825
826/* This is a hacked version of Python's fileobject.c:file_writelines(). */
827static PyObject *
828BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
829{
830#define CHUNKSIZE 1000
831 PyObject *list = NULL;
832 PyObject *iter = NULL;
833 PyObject *ret = NULL;
834 PyObject *line;
835 int i, j, index, len, islist;
836 int bzerror;
837
838 ACQUIRE_LOCK(self);
839 islist = PyList_Check(seq);
840 if (!islist) {
841 iter = PyObject_GetIter(seq);
842 if (iter == NULL) {
843 PyErr_SetString(PyExc_TypeError,
844 "writelines() requires an iterable argument");
845 goto error;
846 }
847 list = PyList_New(CHUNKSIZE);
848 if (list == NULL)
849 goto error;
850 }
851
852 /* Strategy: slurp CHUNKSIZE lines into a private list,
853 checking that they are all strings, then write that list
854 without holding the interpreter lock, then come back for more. */
855 for (index = 0; ; index += CHUNKSIZE) {
856 if (islist) {
857 Py_XDECREF(list);
858 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
859 if (list == NULL)
860 goto error;
861 j = PyList_GET_SIZE(list);
862 }
863 else {
864 for (j = 0; j < CHUNKSIZE; j++) {
865 line = PyIter_Next(iter);
866 if (line == NULL) {
867 if (PyErr_Occurred())
868 goto error;
869 break;
870 }
871 PyList_SetItem(list, j, line);
872 }
873 }
874 if (j == 0)
875 break;
876
877 /* Check that all entries are indeed strings. If not,
878 apply the same rules as for file.write() and
879 convert the rets to strings. This is slow, but
880 seems to be the only way since all conversion APIs
881 could potentially execute Python code. */
882 for (i = 0; i < j; i++) {
883 PyObject *v = PyList_GET_ITEM(list, i);
884 if (!PyString_Check(v)) {
885 const char *buffer;
886 int len;
887 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
888 PyErr_SetString(PyExc_TypeError,
889 "writelines() "
890 "argument must be "
891 "a sequence of "
892 "strings");
893 goto error;
894 }
895 line = PyString_FromStringAndSize(buffer,
896 len);
897 if (line == NULL)
898 goto error;
899 Py_DECREF(v);
900 PyList_SET_ITEM(list, i, line);
901 }
902 }
903
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000904 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000905
906 /* Since we are releasing the global lock, the
907 following code may *not* execute Python code. */
908 Py_BEGIN_ALLOW_THREADS
909 for (i = 0; i < j; i++) {
910 line = PyList_GET_ITEM(list, i);
911 len = PyString_GET_SIZE(line);
912 BZ2_bzWrite (&bzerror, self->fp,
913 PyString_AS_STRING(line), len);
914 if (bzerror != BZ_OK) {
915 Py_BLOCK_THREADS
916 Util_CatchBZ2Error(bzerror);
917 goto error;
918 }
919 }
920 Py_END_ALLOW_THREADS
921
922 if (j < CHUNKSIZE)
923 break;
924 }
925
926 Py_INCREF(Py_None);
927 ret = Py_None;
928
929 error:
930 RELEASE_LOCK(self);
931 Py_XDECREF(list);
932 Py_XDECREF(iter);
933 return ret;
934#undef CHUNKSIZE
935}
936
937PyDoc_STRVAR(BZ2File_seek__doc__,
938"seek(offset [, whence]) -> None\n\
939\n\
940Move to new file position. Argument offset is a byte count. Optional\n\
941argument whence defaults to 0 (offset from start of file, offset\n\
942should be >= 0); other values are 1 (move relative to current position,\n\
943positive or negative), and 2 (move relative to end of file, usually\n\
944negative, although many platforms allow seeking beyond the end of a file).\n\
945\n\
946Note that seeking of bz2 files is emulated, and depending on the parameters\n\
947the operation may be extremely slow.\n\
948");
949
950static PyObject *
951BZ2File_seek(BZ2FileObject *self, PyObject *args)
952{
953 int where = 0;
954 long offset;
955 char small_buffer[SMALLCHUNK];
956 char *buffer = small_buffer;
957 size_t buffersize = SMALLCHUNK;
958 int bytesread = 0;
959 int readsize;
960 int chunksize;
961 int bzerror;
962 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000963 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000964
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000965 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
966 return NULL;
967
968 ACQUIRE_LOCK(self);
969 Util_DropReadAhead(self);
970 switch (self->mode) {
971 case MODE_READ:
972 case MODE_READ_EOF:
973 break;
Tim Peterse3228092002-11-09 04:21:44 +0000974
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000975 case MODE_CLOSED:
976 PyErr_SetString(PyExc_ValueError,
977 "I/O operation on closed file");
978 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000979
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000980 default:
981 PyErr_SetString(PyExc_IOError,
982 "seek works only while reading");
983 goto cleanup;;
984 }
985
986 if (offset < 0) {
987 if (where == 1) {
988 offset = self->pos + offset;
989 rewind = 1;
990 } else if (where == 2) {
991 if (self->size == -1) {
992 assert(self->mode != MODE_READ_EOF);
993 for (;;) {
994 Py_BEGIN_ALLOW_THREADS
995 chunksize = Util_UnivNewlineRead(
996 &bzerror, self->fp,
997 buffer, buffersize,
998 self);
999 self->pos += chunksize;
1000 Py_END_ALLOW_THREADS
1001
1002 bytesread += chunksize;
1003 if (bzerror == BZ_STREAM_END) {
1004 break;
1005 } else if (bzerror != BZ_OK) {
1006 Util_CatchBZ2Error(bzerror);
1007 goto cleanup;
1008 }
1009 }
1010 self->mode = MODE_READ_EOF;
1011 self->size = self->pos;
1012 bytesread = 0;
1013 }
1014 offset = self->size + offset;
1015 if (offset >= self->pos)
1016 offset -= self->pos;
1017 else
1018 rewind = 1;
1019 }
1020 if (offset < 0)
1021 offset = 0;
1022 } else if (where == 0) {
1023 if (offset >= self->pos)
1024 offset -= self->pos;
1025 else
1026 rewind = 1;
1027 }
1028
1029 if (rewind) {
1030 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031 if (bzerror != BZ_OK) {
1032 Util_CatchBZ2Error(bzerror);
1033 goto cleanup;
1034 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001035 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001036 if (!ret)
1037 goto cleanup;
1038 Py_DECREF(ret);
1039 ret = NULL;
1040 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001041 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001042 0, 0, NULL, 0);
1043 if (bzerror != BZ_OK) {
1044 Util_CatchBZ2Error(bzerror);
1045 goto cleanup;
1046 }
1047 self->mode = MODE_READ;
1048 } else if (self->mode == MODE_READ_EOF) {
1049 goto exit;
1050 }
1051
1052 if (offset == 0)
1053 goto exit;
1054
1055 /* Before getting here, offset must be set to the number of bytes
1056 * to walk forward. */
1057 for (;;) {
Tim Petersa17c0c42002-11-09 04:23:31 +00001058 if ((size_t)offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001059 readsize = buffersize;
1060 else
1061 readsize = offset-bytesread;
1062 Py_BEGIN_ALLOW_THREADS
1063 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1064 buffer, readsize, self);
1065 self->pos += chunksize;
1066 Py_END_ALLOW_THREADS
1067 bytesread += chunksize;
1068 if (bzerror == BZ_STREAM_END) {
1069 self->size = self->pos;
1070 self->mode = MODE_READ_EOF;
1071 break;
1072 } else if (bzerror != BZ_OK) {
1073 Util_CatchBZ2Error(bzerror);
1074 goto cleanup;
1075 }
1076 if (bytesread == offset)
1077 break;
1078 }
1079
1080exit:
1081 Py_INCREF(Py_None);
1082 ret = Py_None;
1083
1084cleanup:
1085 RELEASE_LOCK(self);
1086 return ret;
1087}
1088
1089PyDoc_STRVAR(BZ2File_tell__doc__,
1090"tell() -> int\n\
1091\n\
1092Return the current file position, an integer (may be a long integer).\n\
1093");
1094
1095static PyObject *
1096BZ2File_tell(BZ2FileObject *self, PyObject *args)
1097{
1098 PyObject *ret = NULL;
1099
1100 if (self->mode == MODE_CLOSED) {
1101 PyErr_SetString(PyExc_ValueError,
1102 "I/O operation on closed file");
1103 goto cleanup;
1104 }
1105
1106 ret = PyInt_FromLong(self->pos);
1107
1108cleanup:
1109 return ret;
1110}
1111
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001112PyDoc_STRVAR(BZ2File_close__doc__,
1113"close() -> None or (perhaps) an integer\n\
1114\n\
1115Close the file. Sets data attribute .closed to true. A closed file\n\
1116cannot be used for further I/O operations. close() may be called more\n\
1117than once without error.\n\
1118");
1119
1120static PyObject *
1121BZ2File_close(BZ2FileObject *self)
1122{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001123 PyObject *ret = NULL;
1124 int bzerror = BZ_OK;
1125
1126 ACQUIRE_LOCK(self);
1127 switch (self->mode) {
1128 case MODE_READ:
1129 case MODE_READ_EOF:
1130 BZ2_bzReadClose(&bzerror, self->fp);
1131 break;
1132 case MODE_WRITE:
1133 BZ2_bzWriteClose(&bzerror, self->fp,
1134 0, NULL, NULL);
1135 break;
1136 }
1137 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001138 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001139 if (bzerror != BZ_OK) {
1140 Util_CatchBZ2Error(bzerror);
1141 Py_XDECREF(ret);
1142 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001143 }
1144
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001145 RELEASE_LOCK(self);
1146 return ret;
1147}
1148
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001149static PyObject *BZ2File_getiter(BZ2FileObject *self);
1150
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001151static PyMethodDef BZ2File_methods[] = {
1152 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1153 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1154 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001155 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1157 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1158 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1159 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001160 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1161 {NULL, NULL} /* sentinel */
1162};
1163
1164
1165/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001166/* Getters and setters of BZ2File. */
1167
1168#ifdef WITH_UNIVERSAL_NEWLINES
1169/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1170static PyObject *
1171BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1172{
1173 switch (self->f_newlinetypes) {
1174 case NEWLINE_UNKNOWN:
1175 Py_INCREF(Py_None);
1176 return Py_None;
1177 case NEWLINE_CR:
1178 return PyString_FromString("\r");
1179 case NEWLINE_LF:
1180 return PyString_FromString("\n");
1181 case NEWLINE_CR|NEWLINE_LF:
1182 return Py_BuildValue("(ss)", "\r", "\n");
1183 case NEWLINE_CRLF:
1184 return PyString_FromString("\r\n");
1185 case NEWLINE_CR|NEWLINE_CRLF:
1186 return Py_BuildValue("(ss)", "\r", "\r\n");
1187 case NEWLINE_LF|NEWLINE_CRLF:
1188 return Py_BuildValue("(ss)", "\n", "\r\n");
1189 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1190 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1191 default:
1192 PyErr_Format(PyExc_SystemError,
1193 "Unknown newlines value 0x%x\n",
1194 self->f_newlinetypes);
1195 return NULL;
1196 }
1197}
1198#endif
1199
1200static PyObject *
1201BZ2File_get_closed(BZ2FileObject *self, void *closure)
1202{
1203 return PyInt_FromLong(self->mode == MODE_CLOSED);
1204}
1205
1206static PyObject *
1207BZ2File_get_mode(BZ2FileObject *self, void *closure)
1208{
1209 return PyObject_GetAttrString(self->file, "mode");
1210}
1211
1212static PyObject *
1213BZ2File_get_name(BZ2FileObject *self, void *closure)
1214{
1215 return PyObject_GetAttrString(self->file, "name");
1216}
1217
1218static PyGetSetDef BZ2File_getset[] = {
1219 {"closed", (getter)BZ2File_get_closed, NULL,
1220 "True if the file is closed"},
1221#ifdef WITH_UNIVERSAL_NEWLINES
1222 {"newlines", (getter)BZ2File_get_newlines, NULL,
1223 "end-of-line convention used in this file"},
1224#endif
1225 {"mode", (getter)BZ2File_get_mode, NULL,
1226 "file mode ('r', 'w', or 'U')"},
1227 {"name", (getter)BZ2File_get_name, NULL,
1228 "file name"},
1229 {NULL} /* Sentinel */
1230};
1231
1232
1233/* ===================================================================== */
1234/* Members of BZ2File_Type. */
1235
1236#undef OFF
1237#define OFF(x) offsetof(BZ2FileObject, x)
1238
1239static PyMemberDef BZ2File_members[] = {
1240 {"softspace", T_INT, OFF(f_softspace), 0,
1241 "flag indicating that a space needs to be printed; used by print"},
1242 {NULL} /* Sentinel */
1243};
1244
1245/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001246/* Slot definitions for BZ2File_Type. */
1247
1248static int
1249BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1250{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001251 static char *kwlist[] = {"filename", "mode", "buffering",
1252 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001253 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001254 char *mode = "r";
1255 int buffering = -1;
1256 int compresslevel = 9;
1257 int bzerror;
1258 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001259
1260 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001261
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001262 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1263 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001264 &compresslevel))
1265 return -1;
1266
1267 if (compresslevel < 1 || compresslevel > 9) {
1268 PyErr_SetString(PyExc_ValueError,
1269 "compresslevel must be between 1 and 9");
1270 return -1;
1271 }
1272
1273 for (;;) {
1274 int error = 0;
1275 switch (*mode) {
1276 case 'r':
1277 case 'w':
1278 if (mode_char)
1279 error = 1;
1280 mode_char = *mode;
1281 break;
1282
1283 case 'b':
1284 break;
1285
1286 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001287 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001288 break;
1289
1290 default:
1291 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001292 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001293 }
1294 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001295 PyErr_Format(PyExc_ValueError,
1296 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001297 return -1;
1298 }
1299 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001300 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001301 break;
1302 }
1303
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001304 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001305
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001306 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1307 name, mode, buffering);
1308 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001309 return -1;
1310
1311 /* From now on, we have stuff to dealloc, so jump to error label
1312 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001313
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001314#ifdef WITH_THREAD
1315 self->lock = PyThread_allocate_lock();
1316 if (!self->lock)
1317 goto error;
1318#endif
1319
1320 if (mode_char == 'r')
1321 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001322 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001323 0, 0, NULL, 0);
1324 else
1325 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001326 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001327 compresslevel, 0, 0);
1328
1329 if (bzerror != BZ_OK) {
1330 Util_CatchBZ2Error(bzerror);
1331 goto error;
1332 }
1333
1334 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1335
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001336 return 0;
1337
1338error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001339 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001340#ifdef WITH_THREAD
1341 if (self->lock)
1342 PyThread_free_lock(self->lock);
1343#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001344 return -1;
1345}
1346
1347static void
1348BZ2File_dealloc(BZ2FileObject *self)
1349{
1350 int bzerror;
1351#ifdef WITH_THREAD
1352 if (self->lock)
1353 PyThread_free_lock(self->lock);
1354#endif
1355 switch (self->mode) {
1356 case MODE_READ:
1357 case MODE_READ_EOF:
1358 BZ2_bzReadClose(&bzerror, self->fp);
1359 break;
1360 case MODE_WRITE:
1361 BZ2_bzWriteClose(&bzerror, self->fp,
1362 0, NULL, NULL);
1363 break;
1364 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001365 Util_DropReadAhead(self);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001366 Py_DECREF(self->file);
1367 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001368}
1369
1370/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1371static PyObject *
1372BZ2File_getiter(BZ2FileObject *self)
1373{
1374 if (self->mode == MODE_CLOSED) {
1375 PyErr_SetString(PyExc_ValueError,
1376 "I/O operation on closed file");
1377 return NULL;
1378 }
1379 Py_INCREF((PyObject*)self);
1380 return (PyObject *)self;
1381}
1382
1383/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1384#define READAHEAD_BUFSIZE 8192
1385static PyObject *
1386BZ2File_iternext(BZ2FileObject *self)
1387{
1388 PyStringObject* ret;
1389 ACQUIRE_LOCK(self);
1390 if (self->mode == MODE_CLOSED) {
1391 PyErr_SetString(PyExc_ValueError,
1392 "I/O operation on closed file");
1393 return NULL;
1394 }
1395 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1396 RELEASE_LOCK(self);
1397 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1398 Py_XDECREF(ret);
1399 return NULL;
1400 }
1401 return (PyObject *)ret;
1402}
1403
1404/* ===================================================================== */
1405/* BZ2File_Type definition. */
1406
1407PyDoc_VAR(BZ2File__doc__) =
1408PyDoc_STR(
1409"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1410\n\
1411Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1412writing. When opened for writing, the file will be created if it doesn't\n\
1413exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1414unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1415is given, must be a number between 1 and 9.\n\
1416")
1417#ifdef WITH_UNIVERSAL_NEWLINES
1418PyDoc_STR(
1419"\n\
1420Add a 'U' to mode to open the file for input with universal newline\n\
1421support. Any line ending in the input file will be seen as a '\\n' in\n\
1422Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1423for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1424'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1425newlines are available only when reading.\n\
1426")
1427#endif
1428;
1429
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001430static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001431 PyObject_HEAD_INIT(NULL)
1432 0, /*ob_size*/
1433 "bz2.BZ2File", /*tp_name*/
1434 sizeof(BZ2FileObject), /*tp_basicsize*/
1435 0, /*tp_itemsize*/
1436 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1437 0, /*tp_print*/
1438 0, /*tp_getattr*/
1439 0, /*tp_setattr*/
1440 0, /*tp_compare*/
1441 0, /*tp_repr*/
1442 0, /*tp_as_number*/
1443 0, /*tp_as_sequence*/
1444 0, /*tp_as_mapping*/
1445 0, /*tp_hash*/
1446 0, /*tp_call*/
1447 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001448 PyObject_GenericGetAttr,/*tp_getattro*/
1449 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001450 0, /*tp_as_buffer*/
1451 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1452 BZ2File__doc__, /*tp_doc*/
1453 0, /*tp_traverse*/
1454 0, /*tp_clear*/
1455 0, /*tp_richcompare*/
1456 0, /*tp_weaklistoffset*/
1457 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1458 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1459 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001460 BZ2File_members, /*tp_members*/
1461 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001462 0, /*tp_base*/
1463 0, /*tp_dict*/
1464 0, /*tp_descr_get*/
1465 0, /*tp_descr_set*/
1466 0, /*tp_dictoffset*/
1467 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001468 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001469 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001470 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001471 0, /*tp_is_gc*/
1472};
1473
1474
1475/* ===================================================================== */
1476/* Methods of BZ2Comp. */
1477
1478PyDoc_STRVAR(BZ2Comp_compress__doc__,
1479"compress(data) -> string\n\
1480\n\
1481Provide more data to the compressor object. It will return chunks of\n\
1482compressed data whenever possible. When you've finished providing data\n\
1483to compress, call the flush() method to finish the compression process,\n\
1484and return what is left in the internal buffers.\n\
1485");
1486
1487static PyObject *
1488BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1489{
1490 char *data;
1491 int datasize;
1492 int bufsize = SMALLCHUNK;
Tim Peters07f075c2002-11-09 04:26:02 +00001493 LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001494 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001495 bz_stream *bzs = &self->bzs;
1496 int bzerror;
1497
1498 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1499 return NULL;
1500
1501 ACQUIRE_LOCK(self);
1502 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001503 PyErr_SetString(PyExc_ValueError,
1504 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001505 goto error;
1506 }
1507
1508 ret = PyString_FromStringAndSize(NULL, bufsize);
1509 if (!ret)
1510 goto error;
1511
1512 bzs->next_in = data;
1513 bzs->avail_in = datasize;
1514 bzs->next_out = BUF(ret);
1515 bzs->avail_out = bufsize;
1516
1517 totalout = BZS_TOTAL_OUT(bzs);
1518
1519 for (;;) {
1520 Py_BEGIN_ALLOW_THREADS
1521 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1522 Py_END_ALLOW_THREADS
1523 if (bzerror != BZ_RUN_OK) {
1524 Util_CatchBZ2Error(bzerror);
1525 goto error;
1526 }
1527 if (bzs->avail_out == 0) {
1528 bufsize = Util_NewBufferSize(bufsize);
1529 if (_PyString_Resize(&ret, bufsize) < 0) {
1530 BZ2_bzCompressEnd(bzs);
1531 goto error;
1532 }
1533 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1534 - totalout);
1535 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1536 } else if (bzs->avail_in == 0) {
1537 break;
1538 }
1539 }
1540
Tim Petersf29f0c62002-11-09 04:28:17 +00001541 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001542
1543 RELEASE_LOCK(self);
1544 return ret;
1545
1546error:
1547 RELEASE_LOCK(self);
1548 Py_XDECREF(ret);
1549 return NULL;
1550}
1551
1552PyDoc_STRVAR(BZ2Comp_flush__doc__,
1553"flush() -> string\n\
1554\n\
1555Finish the compression process and return what is left in internal buffers.\n\
1556You must not use the compressor object after calling this method.\n\
1557");
1558
1559static PyObject *
1560BZ2Comp_flush(BZ2CompObject *self)
1561{
1562 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001563 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001564 bz_stream *bzs = &self->bzs;
Tim Peters2858e5e2002-11-09 04:30:08 +00001565 LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001566 int bzerror;
1567
1568 ACQUIRE_LOCK(self);
1569 if (!self->running) {
1570 PyErr_SetString(PyExc_ValueError, "object was already "
1571 "flushed");
1572 goto error;
1573 }
1574 self->running = 0;
1575
1576 ret = PyString_FromStringAndSize(NULL, bufsize);
1577 if (!ret)
1578 goto error;
1579
1580 bzs->next_out = BUF(ret);
1581 bzs->avail_out = bufsize;
1582
1583 totalout = BZS_TOTAL_OUT(bzs);
1584
1585 for (;;) {
1586 Py_BEGIN_ALLOW_THREADS
1587 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1588 Py_END_ALLOW_THREADS
1589 if (bzerror == BZ_STREAM_END) {
1590 break;
1591 } else if (bzerror != BZ_FINISH_OK) {
1592 Util_CatchBZ2Error(bzerror);
1593 goto error;
1594 }
1595 if (bzs->avail_out == 0) {
1596 bufsize = Util_NewBufferSize(bufsize);
1597 if (_PyString_Resize(&ret, bufsize) < 0)
1598 goto error;
1599 bzs->next_out = BUF(ret);
1600 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1601 - totalout);
1602 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1603 }
1604 }
1605
1606 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001607 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001608
1609 RELEASE_LOCK(self);
1610 return ret;
1611
1612error:
1613 RELEASE_LOCK(self);
1614 Py_XDECREF(ret);
1615 return NULL;
1616}
1617
1618static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001619 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1620 BZ2Comp_compress__doc__},
1621 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1622 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001623 {NULL, NULL} /* sentinel */
1624};
1625
1626
1627/* ===================================================================== */
1628/* Slot definitions for BZ2Comp_Type. */
1629
1630static int
1631BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1632{
1633 int compresslevel = 9;
1634 int bzerror;
1635 static char *kwlist[] = {"compresslevel", 0};
1636
1637 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1638 kwlist, &compresslevel))
1639 return -1;
1640
1641 if (compresslevel < 1 || compresslevel > 9) {
1642 PyErr_SetString(PyExc_ValueError,
1643 "compresslevel must be between 1 and 9");
1644 goto error;
1645 }
1646
1647#ifdef WITH_THREAD
1648 self->lock = PyThread_allocate_lock();
1649 if (!self->lock)
1650 goto error;
1651#endif
1652
1653 memset(&self->bzs, 0, sizeof(bz_stream));
1654 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1655 if (bzerror != BZ_OK) {
1656 Util_CatchBZ2Error(bzerror);
1657 goto error;
1658 }
1659
1660 self->running = 1;
1661
1662 return 0;
1663error:
1664#ifdef WITH_THREAD
1665 if (self->lock)
1666 PyThread_free_lock(self->lock);
1667#endif
1668 return -1;
1669}
1670
1671static void
1672BZ2Comp_dealloc(BZ2CompObject *self)
1673{
1674#ifdef WITH_THREAD
1675 if (self->lock)
1676 PyThread_free_lock(self->lock);
1677#endif
1678 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001679 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001680}
1681
1682
1683/* ===================================================================== */
1684/* BZ2Comp_Type definition. */
1685
1686PyDoc_STRVAR(BZ2Comp__doc__,
1687"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1688\n\
1689Create a new compressor object. This object may be used to compress\n\
1690data sequentially. If you want to compress data in one shot, use the\n\
1691compress() function instead. The compresslevel parameter, if given,\n\
1692must be a number between 1 and 9.\n\
1693");
1694
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001695static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001696 PyObject_HEAD_INIT(NULL)
1697 0, /*ob_size*/
1698 "bz2.BZ2Compressor", /*tp_name*/
1699 sizeof(BZ2CompObject), /*tp_basicsize*/
1700 0, /*tp_itemsize*/
1701 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1702 0, /*tp_print*/
1703 0, /*tp_getattr*/
1704 0, /*tp_setattr*/
1705 0, /*tp_compare*/
1706 0, /*tp_repr*/
1707 0, /*tp_as_number*/
1708 0, /*tp_as_sequence*/
1709 0, /*tp_as_mapping*/
1710 0, /*tp_hash*/
1711 0, /*tp_call*/
1712 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001713 PyObject_GenericGetAttr,/*tp_getattro*/
1714 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001715 0, /*tp_as_buffer*/
1716 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1717 BZ2Comp__doc__, /*tp_doc*/
1718 0, /*tp_traverse*/
1719 0, /*tp_clear*/
1720 0, /*tp_richcompare*/
1721 0, /*tp_weaklistoffset*/
1722 0, /*tp_iter*/
1723 0, /*tp_iternext*/
1724 BZ2Comp_methods, /*tp_methods*/
1725 0, /*tp_members*/
1726 0, /*tp_getset*/
1727 0, /*tp_base*/
1728 0, /*tp_dict*/
1729 0, /*tp_descr_get*/
1730 0, /*tp_descr_set*/
1731 0, /*tp_dictoffset*/
1732 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001733 PyType_GenericAlloc, /*tp_alloc*/
1734 PyType_GenericNew, /*tp_new*/
1735 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001736 0, /*tp_is_gc*/
1737};
1738
1739
1740/* ===================================================================== */
1741/* Members of BZ2Decomp. */
1742
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001743#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744#define OFF(x) offsetof(BZ2DecompObject, x)
1745
1746static PyMemberDef BZ2Decomp_members[] = {
1747 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1748 {NULL} /* Sentinel */
1749};
1750
1751
1752/* ===================================================================== */
1753/* Methods of BZ2Decomp. */
1754
1755PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1756"decompress(data) -> string\n\
1757\n\
1758Provide more data to the decompressor object. It will return chunks\n\
1759of decompressed data whenever possible. If you try to decompress data\n\
1760after the end of stream is found, EOFError will be raised. If any data\n\
1761was found after the end of stream, it'll be ignored and saved in\n\
1762unused_data attribute.\n\
1763");
1764
1765static PyObject *
1766BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1767{
1768 char *data;
1769 int datasize;
1770 int bufsize = SMALLCHUNK;
Tim Peters39185d62002-11-09 04:31:38 +00001771 LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001772 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001773 bz_stream *bzs = &self->bzs;
1774 int bzerror;
1775
1776 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1777 return NULL;
1778
1779 ACQUIRE_LOCK(self);
1780 if (!self->running) {
1781 PyErr_SetString(PyExc_EOFError, "end of stream was "
1782 "already found");
1783 goto error;
1784 }
1785
1786 ret = PyString_FromStringAndSize(NULL, bufsize);
1787 if (!ret)
1788 goto error;
1789
1790 bzs->next_in = data;
1791 bzs->avail_in = datasize;
1792 bzs->next_out = BUF(ret);
1793 bzs->avail_out = bufsize;
1794
1795 totalout = BZS_TOTAL_OUT(bzs);
1796
1797 for (;;) {
1798 Py_BEGIN_ALLOW_THREADS
1799 bzerror = BZ2_bzDecompress(bzs);
1800 Py_END_ALLOW_THREADS
1801 if (bzerror == BZ_STREAM_END) {
1802 if (bzs->avail_in != 0) {
1803 Py_DECREF(self->unused_data);
1804 self->unused_data =
1805 PyString_FromStringAndSize(bzs->next_in,
1806 bzs->avail_in);
1807 }
1808 self->running = 0;
1809 break;
1810 }
1811 if (bzerror != BZ_OK) {
1812 Util_CatchBZ2Error(bzerror);
1813 goto error;
1814 }
1815 if (bzs->avail_out == 0) {
1816 bufsize = Util_NewBufferSize(bufsize);
1817 if (_PyString_Resize(&ret, bufsize) < 0) {
1818 BZ2_bzDecompressEnd(bzs);
1819 goto error;
1820 }
1821 bzs->next_out = BUF(ret);
1822 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1823 - totalout);
1824 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1825 } else if (bzs->avail_in == 0) {
1826 break;
1827 }
1828 }
1829
1830 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001831 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001832
1833 RELEASE_LOCK(self);
1834 return ret;
1835
1836error:
1837 RELEASE_LOCK(self);
1838 Py_XDECREF(ret);
1839 return NULL;
1840}
1841
1842static PyMethodDef BZ2Decomp_methods[] = {
1843 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1844 {NULL, NULL} /* sentinel */
1845};
1846
1847
1848/* ===================================================================== */
1849/* Slot definitions for BZ2Decomp_Type. */
1850
1851static int
1852BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1853{
1854 int bzerror;
1855
1856 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1857 return -1;
1858
1859#ifdef WITH_THREAD
1860 self->lock = PyThread_allocate_lock();
1861 if (!self->lock)
1862 goto error;
1863#endif
1864
1865 self->unused_data = PyString_FromString("");
1866 if (!self->unused_data)
1867 goto error;
1868
1869 memset(&self->bzs, 0, sizeof(bz_stream));
1870 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1871 if (bzerror != BZ_OK) {
1872 Util_CatchBZ2Error(bzerror);
1873 goto error;
1874 }
1875
1876 self->running = 1;
1877
1878 return 0;
1879
1880error:
1881#ifdef WITH_THREAD
1882 if (self->lock)
1883 PyThread_free_lock(self->lock);
1884#endif
1885 Py_XDECREF(self->unused_data);
1886 return -1;
1887}
1888
1889static void
1890BZ2Decomp_dealloc(BZ2DecompObject *self)
1891{
1892#ifdef WITH_THREAD
1893 if (self->lock)
1894 PyThread_free_lock(self->lock);
1895#endif
1896 Py_XDECREF(self->unused_data);
1897 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001898 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001899}
1900
1901
1902/* ===================================================================== */
1903/* BZ2Decomp_Type definition. */
1904
1905PyDoc_STRVAR(BZ2Decomp__doc__,
1906"BZ2Decompressor() -> decompressor object\n\
1907\n\
1908Create a new decompressor object. This object may be used to decompress\n\
1909data sequentially. If you want to decompress data in one shot, use the\n\
1910decompress() function instead.\n\
1911");
1912
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001913static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914 PyObject_HEAD_INIT(NULL)
1915 0, /*ob_size*/
1916 "bz2.BZ2Decompressor", /*tp_name*/
1917 sizeof(BZ2DecompObject), /*tp_basicsize*/
1918 0, /*tp_itemsize*/
1919 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1920 0, /*tp_print*/
1921 0, /*tp_getattr*/
1922 0, /*tp_setattr*/
1923 0, /*tp_compare*/
1924 0, /*tp_repr*/
1925 0, /*tp_as_number*/
1926 0, /*tp_as_sequence*/
1927 0, /*tp_as_mapping*/
1928 0, /*tp_hash*/
1929 0, /*tp_call*/
1930 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001931 PyObject_GenericGetAttr,/*tp_getattro*/
1932 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001933 0, /*tp_as_buffer*/
1934 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1935 BZ2Decomp__doc__, /*tp_doc*/
1936 0, /*tp_traverse*/
1937 0, /*tp_clear*/
1938 0, /*tp_richcompare*/
1939 0, /*tp_weaklistoffset*/
1940 0, /*tp_iter*/
1941 0, /*tp_iternext*/
1942 BZ2Decomp_methods, /*tp_methods*/
1943 BZ2Decomp_members, /*tp_members*/
1944 0, /*tp_getset*/
1945 0, /*tp_base*/
1946 0, /*tp_dict*/
1947 0, /*tp_descr_get*/
1948 0, /*tp_descr_set*/
1949 0, /*tp_dictoffset*/
1950 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001951 PyType_GenericAlloc, /*tp_alloc*/
1952 PyType_GenericNew, /*tp_new*/
1953 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954 0, /*tp_is_gc*/
1955};
1956
1957
1958/* ===================================================================== */
1959/* Module functions. */
1960
1961PyDoc_STRVAR(bz2_compress__doc__,
1962"compress(data [, compresslevel=9]) -> string\n\
1963\n\
1964Compress data in one shot. If you want to compress data sequentially,\n\
1965use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1966given, must be a number between 1 and 9.\n\
1967");
1968
1969static PyObject *
1970bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1971{
1972 int compresslevel=9;
1973 char *data;
1974 int datasize;
1975 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001976 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001977 bz_stream _bzs;
1978 bz_stream *bzs = &_bzs;
1979 int bzerror;
1980 static char *kwlist[] = {"data", "compresslevel", 0};
1981
1982 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1983 kwlist, &data, &datasize,
1984 &compresslevel))
1985 return NULL;
1986
1987 if (compresslevel < 1 || compresslevel > 9) {
1988 PyErr_SetString(PyExc_ValueError,
1989 "compresslevel must be between 1 and 9");
1990 return NULL;
1991 }
1992
1993 /* Conforming to bz2 manual, this is large enough to fit compressed
1994 * data in one shot. We will check it later anyway. */
1995 bufsize = datasize + (datasize/100+1) + 600;
1996
1997 ret = PyString_FromStringAndSize(NULL, bufsize);
1998 if (!ret)
1999 return NULL;
2000
2001 memset(bzs, 0, sizeof(bz_stream));
2002
2003 bzs->next_in = data;
2004 bzs->avail_in = datasize;
2005 bzs->next_out = BUF(ret);
2006 bzs->avail_out = bufsize;
2007
2008 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2009 if (bzerror != BZ_OK) {
2010 Util_CatchBZ2Error(bzerror);
2011 Py_DECREF(ret);
2012 return NULL;
2013 }
Tim Peterse3228092002-11-09 04:21:44 +00002014
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002015 for (;;) {
2016 Py_BEGIN_ALLOW_THREADS
2017 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2018 Py_END_ALLOW_THREADS
2019 if (bzerror == BZ_STREAM_END) {
2020 break;
2021 } else if (bzerror != BZ_FINISH_OK) {
2022 BZ2_bzCompressEnd(bzs);
2023 Util_CatchBZ2Error(bzerror);
2024 Py_DECREF(ret);
2025 return NULL;
2026 }
2027 if (bzs->avail_out == 0) {
2028 bufsize = Util_NewBufferSize(bufsize);
2029 if (_PyString_Resize(&ret, bufsize) < 0) {
2030 BZ2_bzCompressEnd(bzs);
2031 Py_DECREF(ret);
2032 return NULL;
2033 }
2034 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2035 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2036 }
2037 }
2038
2039 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002040 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002041 BZ2_bzCompressEnd(bzs);
2042
2043 return ret;
2044}
2045
2046PyDoc_STRVAR(bz2_decompress__doc__,
2047"decompress(data) -> decompressed data\n\
2048\n\
2049Decompress data in one shot. If you want to decompress data sequentially,\n\
2050use an instance of BZ2Decompressor instead.\n\
2051");
2052
2053static PyObject *
2054bz2_decompress(PyObject *self, PyObject *args)
2055{
2056 char *data;
2057 int datasize;
2058 int bufsize = SMALLCHUNK;
2059 PyObject *ret;
2060 bz_stream _bzs;
2061 bz_stream *bzs = &_bzs;
2062 int bzerror;
2063
2064 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2065 return NULL;
2066
2067 if (datasize == 0)
2068 return PyString_FromString("");
2069
2070 ret = PyString_FromStringAndSize(NULL, bufsize);
2071 if (!ret)
2072 return NULL;
2073
2074 memset(bzs, 0, sizeof(bz_stream));
2075
2076 bzs->next_in = data;
2077 bzs->avail_in = datasize;
2078 bzs->next_out = BUF(ret);
2079 bzs->avail_out = bufsize;
2080
2081 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2082 if (bzerror != BZ_OK) {
2083 Util_CatchBZ2Error(bzerror);
2084 Py_DECREF(ret);
2085 return NULL;
2086 }
Tim Peterse3228092002-11-09 04:21:44 +00002087
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002088 for (;;) {
2089 Py_BEGIN_ALLOW_THREADS
2090 bzerror = BZ2_bzDecompress(bzs);
2091 Py_END_ALLOW_THREADS
2092 if (bzerror == BZ_STREAM_END) {
2093 break;
2094 } else if (bzerror != BZ_OK) {
2095 BZ2_bzDecompressEnd(bzs);
2096 Util_CatchBZ2Error(bzerror);
2097 Py_DECREF(ret);
2098 return NULL;
2099 }
2100 if (bzs->avail_out == 0) {
2101 bufsize = Util_NewBufferSize(bufsize);
2102 if (_PyString_Resize(&ret, bufsize) < 0) {
2103 BZ2_bzDecompressEnd(bzs);
2104 Py_DECREF(ret);
2105 return NULL;
2106 }
2107 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2108 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2109 } else if (bzs->avail_in == 0) {
2110 BZ2_bzDecompressEnd(bzs);
2111 PyErr_SetString(PyExc_ValueError,
2112 "couldn't find end of stream");
2113 Py_DECREF(ret);
2114 return NULL;
2115 }
2116 }
2117
2118 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002119 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002120 BZ2_bzDecompressEnd(bzs);
2121
2122 return ret;
2123}
2124
2125static PyMethodDef bz2_methods[] = {
2126 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2127 bz2_compress__doc__},
2128 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2129 bz2_decompress__doc__},
2130 {NULL, NULL} /* sentinel */
2131};
2132
2133/* ===================================================================== */
2134/* Initialization function. */
2135
2136PyDoc_STRVAR(bz2__doc__,
2137"The python bz2 module provides a comprehensive interface for\n\
2138the bz2 compression library. It implements a complete file\n\
2139interface, one shot (de)compression functions, and types for\n\
2140sequential (de)compression.\n\
2141");
2142
2143DL_EXPORT(void)
2144initbz2(void)
2145{
2146 PyObject *m;
2147
2148 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002149 BZ2Comp_Type.ob_type = &PyType_Type;
2150 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002151
2152 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2153
2154 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2155
2156 Py_INCREF(&BZ2File_Type);
2157 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2158
2159 Py_INCREF(&BZ2Comp_Type);
2160 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2161
2162 Py_INCREF(&BZ2Decomp_Type);
2163 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2164}