blob: 5800bff86e92f591d3387847784b5988d13b06bb [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
246
247 buf = BUF(v);
248 end = buf + total_v_size;
249
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (univ_newline) {
253 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 if (bzerror != BZ_OK || buf == end)
257 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000261 /* Seeing a \n here with
262 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263 * saw a \r before.
264 */
265 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000266 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
272 }
273 }
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
281 }
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000285 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
301 }
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
315 }
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
326}
327
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328/* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330size_t
331Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333{
334 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 int newlinetypes, skipnextlf;
336
337 assert(buf != NULL);
338 assert(stream != NULL);
339
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
342
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
345
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
348 */
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
353
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
364 }
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
370 }
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
375 */
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
382 }
383 }
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
389 }
390 }
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
394}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395
396/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000398Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
403 }
404}
405
406/* This is a hacked version of Python's fileobject.c:readahead(). */
407static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409{
410 int chunksize;
411 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412
413 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000414 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000415 return 0;
416 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000911 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000979 PyObject *offobj;
980 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 char small_buffer[SMALLCHUNK];
982 char *buffer = small_buffer;
983 size_t buffersize = SMALLCHUNK;
984 int bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000985 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 int chunksize;
987 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000988 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000989
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000990 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
991 return NULL;
992#if !defined(HAVE_LARGEFILE_SUPPORT)
993 offset = PyInt_AsLong(offobj);
994#else
995 offset = PyLong_Check(offobj) ?
996 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
997#endif
998 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000999 return NULL;
1000
1001 ACQUIRE_LOCK(self);
1002 Util_DropReadAhead(self);
1003 switch (self->mode) {
1004 case MODE_READ:
1005 case MODE_READ_EOF:
1006 break;
Tim Peterse3228092002-11-09 04:21:44 +00001007
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001008 case MODE_CLOSED:
1009 PyErr_SetString(PyExc_ValueError,
1010 "I/O operation on closed file");
1011 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001012
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001013 default:
1014 PyErr_SetString(PyExc_IOError,
1015 "seek works only while reading");
1016 goto cleanup;;
1017 }
1018
Georg Brandl47fab922006-02-18 21:57:25 +00001019 if (where == 2) {
1020 if (self->size == -1) {
1021 assert(self->mode != MODE_READ_EOF);
1022 for (;;) {
1023 Py_BEGIN_ALLOW_THREADS
1024 chunksize = Util_UnivNewlineRead(
1025 &bzerror, self->fp,
1026 buffer, buffersize,
1027 self);
1028 self->pos += chunksize;
1029 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001030
Georg Brandl47fab922006-02-18 21:57:25 +00001031 bytesread += chunksize;
1032 if (bzerror == BZ_STREAM_END) {
1033 break;
1034 } else if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001037 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001038 }
Georg Brandl47fab922006-02-18 21:57:25 +00001039 self->mode = MODE_READ_EOF;
1040 self->size = self->pos;
1041 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001042 }
Georg Brandl47fab922006-02-18 21:57:25 +00001043 offset = self->size + offset;
1044 } else if (where == 1) {
1045 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001046 }
1047
Georg Brandl47fab922006-02-18 21:57:25 +00001048 /* Before getting here, offset must be the absolute position the file
1049 * pointer should be set to. */
1050
1051 if (offset >= self->pos) {
1052 /* we can move forward */
1053 offset -= self->pos;
1054 } else {
1055 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001057 if (bzerror != BZ_OK) {
1058 Util_CatchBZ2Error(bzerror);
1059 goto cleanup;
1060 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001061 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001062 if (!ret)
1063 goto cleanup;
1064 Py_DECREF(ret);
1065 ret = NULL;
1066 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001067 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001068 0, 0, NULL, 0);
1069 if (bzerror != BZ_OK) {
1070 Util_CatchBZ2Error(bzerror);
1071 goto cleanup;
1072 }
1073 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001074 }
1075
Georg Brandl47fab922006-02-18 21:57:25 +00001076 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001077 goto exit;
1078
1079 /* Before getting here, offset must be set to the number of bytes
1080 * to walk forward. */
1081 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001082 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001083 readsize = buffersize;
1084 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001085 /* offset might be wider that readsize, but the result
1086 * of the subtraction is bound by buffersize (see the
1087 * condition above). buffersize is 8192. */
1088 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001089 Py_BEGIN_ALLOW_THREADS
1090 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1091 buffer, readsize, self);
1092 self->pos += chunksize;
1093 Py_END_ALLOW_THREADS
1094 bytesread += chunksize;
1095 if (bzerror == BZ_STREAM_END) {
1096 self->size = self->pos;
1097 self->mode = MODE_READ_EOF;
1098 break;
1099 } else if (bzerror != BZ_OK) {
1100 Util_CatchBZ2Error(bzerror);
1101 goto cleanup;
1102 }
1103 if (bytesread == offset)
1104 break;
1105 }
1106
1107exit:
1108 Py_INCREF(Py_None);
1109 ret = Py_None;
1110
1111cleanup:
1112 RELEASE_LOCK(self);
1113 return ret;
1114}
1115
1116PyDoc_STRVAR(BZ2File_tell__doc__,
1117"tell() -> int\n\
1118\n\
1119Return the current file position, an integer (may be a long integer).\n\
1120");
1121
1122static PyObject *
1123BZ2File_tell(BZ2FileObject *self, PyObject *args)
1124{
1125 PyObject *ret = NULL;
1126
1127 if (self->mode == MODE_CLOSED) {
1128 PyErr_SetString(PyExc_ValueError,
1129 "I/O operation on closed file");
1130 goto cleanup;
1131 }
1132
Georg Brandla8bcecc2005-09-03 07:49:53 +00001133#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001134 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001135#else
1136 ret = PyLong_FromLongLong(self->pos);
1137#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001138
1139cleanup:
1140 return ret;
1141}
1142
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001143PyDoc_STRVAR(BZ2File_close__doc__,
1144"close() -> None or (perhaps) an integer\n\
1145\n\
1146Close the file. Sets data attribute .closed to true. A closed file\n\
1147cannot be used for further I/O operations. close() may be called more\n\
1148than once without error.\n\
1149");
1150
1151static PyObject *
1152BZ2File_close(BZ2FileObject *self)
1153{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154 PyObject *ret = NULL;
1155 int bzerror = BZ_OK;
1156
1157 ACQUIRE_LOCK(self);
1158 switch (self->mode) {
1159 case MODE_READ:
1160 case MODE_READ_EOF:
1161 BZ2_bzReadClose(&bzerror, self->fp);
1162 break;
1163 case MODE_WRITE:
1164 BZ2_bzWriteClose(&bzerror, self->fp,
1165 0, NULL, NULL);
1166 break;
1167 }
1168 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001169 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001170 if (bzerror != BZ_OK) {
1171 Util_CatchBZ2Error(bzerror);
1172 Py_XDECREF(ret);
1173 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001174 }
1175
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001176 RELEASE_LOCK(self);
1177 return ret;
1178}
1179
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001180static PyObject *BZ2File_getiter(BZ2FileObject *self);
1181
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001182static PyMethodDef BZ2File_methods[] = {
1183 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1184 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1185 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001186 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001187 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1188 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1189 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1190 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001191 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1192 {NULL, NULL} /* sentinel */
1193};
1194
1195
1196/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001197/* Getters and setters of BZ2File. */
1198
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001199/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1200static PyObject *
1201BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1202{
1203 switch (self->f_newlinetypes) {
1204 case NEWLINE_UNKNOWN:
1205 Py_INCREF(Py_None);
1206 return Py_None;
1207 case NEWLINE_CR:
1208 return PyString_FromString("\r");
1209 case NEWLINE_LF:
1210 return PyString_FromString("\n");
1211 case NEWLINE_CR|NEWLINE_LF:
1212 return Py_BuildValue("(ss)", "\r", "\n");
1213 case NEWLINE_CRLF:
1214 return PyString_FromString("\r\n");
1215 case NEWLINE_CR|NEWLINE_CRLF:
1216 return Py_BuildValue("(ss)", "\r", "\r\n");
1217 case NEWLINE_LF|NEWLINE_CRLF:
1218 return Py_BuildValue("(ss)", "\n", "\r\n");
1219 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1220 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1221 default:
1222 PyErr_Format(PyExc_SystemError,
1223 "Unknown newlines value 0x%x\n",
1224 self->f_newlinetypes);
1225 return NULL;
1226 }
1227}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001228
1229static PyObject *
1230BZ2File_get_closed(BZ2FileObject *self, void *closure)
1231{
1232 return PyInt_FromLong(self->mode == MODE_CLOSED);
1233}
1234
1235static PyObject *
1236BZ2File_get_mode(BZ2FileObject *self, void *closure)
1237{
1238 return PyObject_GetAttrString(self->file, "mode");
1239}
1240
1241static PyObject *
1242BZ2File_get_name(BZ2FileObject *self, void *closure)
1243{
1244 return PyObject_GetAttrString(self->file, "name");
1245}
1246
1247static PyGetSetDef BZ2File_getset[] = {
1248 {"closed", (getter)BZ2File_get_closed, NULL,
1249 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001250 {"newlines", (getter)BZ2File_get_newlines, NULL,
1251 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001252 {"mode", (getter)BZ2File_get_mode, NULL,
1253 "file mode ('r', 'w', or 'U')"},
1254 {"name", (getter)BZ2File_get_name, NULL,
1255 "file name"},
1256 {NULL} /* Sentinel */
1257};
1258
1259
1260/* ===================================================================== */
1261/* Members of BZ2File_Type. */
1262
1263#undef OFF
1264#define OFF(x) offsetof(BZ2FileObject, x)
1265
1266static PyMemberDef BZ2File_members[] = {
1267 {"softspace", T_INT, OFF(f_softspace), 0,
1268 "flag indicating that a space needs to be printed; used by print"},
1269 {NULL} /* Sentinel */
1270};
1271
1272/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001273/* Slot definitions for BZ2File_Type. */
1274
1275static int
1276BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1277{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001278 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001279 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001280 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001281 char *mode = "r";
1282 int buffering = -1;
1283 int compresslevel = 9;
1284 int bzerror;
1285 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001286
1287 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001288
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001289 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1290 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001291 &compresslevel))
1292 return -1;
1293
1294 if (compresslevel < 1 || compresslevel > 9) {
1295 PyErr_SetString(PyExc_ValueError,
1296 "compresslevel must be between 1 and 9");
1297 return -1;
1298 }
1299
1300 for (;;) {
1301 int error = 0;
1302 switch (*mode) {
1303 case 'r':
1304 case 'w':
1305 if (mode_char)
1306 error = 1;
1307 mode_char = *mode;
1308 break;
1309
1310 case 'b':
1311 break;
1312
1313 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001314#ifdef __VMS
1315 self->f_univ_newline = 0;
1316#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001317 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001318#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001319 break;
1320
1321 default:
1322 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001323 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001324 }
1325 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001326 PyErr_Format(PyExc_ValueError,
1327 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001328 return -1;
1329 }
1330 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001331 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001332 break;
1333 }
1334
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001335 if (mode_char == 0) {
1336 mode_char = 'r';
1337 }
1338
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001339 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001340
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001341 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1342 name, mode, buffering);
1343 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001344 return -1;
1345
1346 /* From now on, we have stuff to dealloc, so jump to error label
1347 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001349#ifdef WITH_THREAD
1350 self->lock = PyThread_allocate_lock();
1351 if (!self->lock)
1352 goto error;
1353#endif
1354
1355 if (mode_char == 'r')
1356 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001357 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001358 0, 0, NULL, 0);
1359 else
1360 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001361 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001362 compresslevel, 0, 0);
1363
1364 if (bzerror != BZ_OK) {
1365 Util_CatchBZ2Error(bzerror);
1366 goto error;
1367 }
1368
1369 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1370
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001371 return 0;
1372
1373error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001374 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375#ifdef WITH_THREAD
1376 if (self->lock)
1377 PyThread_free_lock(self->lock);
1378#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001379 return -1;
1380}
1381
1382static void
1383BZ2File_dealloc(BZ2FileObject *self)
1384{
1385 int bzerror;
1386#ifdef WITH_THREAD
1387 if (self->lock)
1388 PyThread_free_lock(self->lock);
1389#endif
1390 switch (self->mode) {
1391 case MODE_READ:
1392 case MODE_READ_EOF:
1393 BZ2_bzReadClose(&bzerror, self->fp);
1394 break;
1395 case MODE_WRITE:
1396 BZ2_bzWriteClose(&bzerror, self->fp,
1397 0, NULL, NULL);
1398 break;
1399 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001400 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001401 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001402 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001403}
1404
1405/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1406static PyObject *
1407BZ2File_getiter(BZ2FileObject *self)
1408{
1409 if (self->mode == MODE_CLOSED) {
1410 PyErr_SetString(PyExc_ValueError,
1411 "I/O operation on closed file");
1412 return NULL;
1413 }
1414 Py_INCREF((PyObject*)self);
1415 return (PyObject *)self;
1416}
1417
1418/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1419#define READAHEAD_BUFSIZE 8192
1420static PyObject *
1421BZ2File_iternext(BZ2FileObject *self)
1422{
1423 PyStringObject* ret;
1424 ACQUIRE_LOCK(self);
1425 if (self->mode == MODE_CLOSED) {
1426 PyErr_SetString(PyExc_ValueError,
1427 "I/O operation on closed file");
1428 return NULL;
1429 }
1430 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1431 RELEASE_LOCK(self);
1432 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1433 Py_XDECREF(ret);
1434 return NULL;
1435 }
1436 return (PyObject *)ret;
1437}
1438
1439/* ===================================================================== */
1440/* BZ2File_Type definition. */
1441
1442PyDoc_VAR(BZ2File__doc__) =
1443PyDoc_STR(
1444"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1445\n\
1446Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1447writing. When opened for writing, the file will be created if it doesn't\n\
1448exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1449unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1450is given, must be a number between 1 and 9.\n\
1451")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001452PyDoc_STR(
1453"\n\
1454Add a 'U' to mode to open the file for input with universal newline\n\
1455support. Any line ending in the input file will be seen as a '\\n' in\n\
1456Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1457for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1458'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1459newlines are available only when reading.\n\
1460")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001461;
1462
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001463static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001464 PyObject_HEAD_INIT(NULL)
1465 0, /*ob_size*/
1466 "bz2.BZ2File", /*tp_name*/
1467 sizeof(BZ2FileObject), /*tp_basicsize*/
1468 0, /*tp_itemsize*/
1469 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1470 0, /*tp_print*/
1471 0, /*tp_getattr*/
1472 0, /*tp_setattr*/
1473 0, /*tp_compare*/
1474 0, /*tp_repr*/
1475 0, /*tp_as_number*/
1476 0, /*tp_as_sequence*/
1477 0, /*tp_as_mapping*/
1478 0, /*tp_hash*/
1479 0, /*tp_call*/
1480 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001481 PyObject_GenericGetAttr,/*tp_getattro*/
1482 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001483 0, /*tp_as_buffer*/
1484 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1485 BZ2File__doc__, /*tp_doc*/
1486 0, /*tp_traverse*/
1487 0, /*tp_clear*/
1488 0, /*tp_richcompare*/
1489 0, /*tp_weaklistoffset*/
1490 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1491 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1492 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001493 BZ2File_members, /*tp_members*/
1494 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001495 0, /*tp_base*/
1496 0, /*tp_dict*/
1497 0, /*tp_descr_get*/
1498 0, /*tp_descr_set*/
1499 0, /*tp_dictoffset*/
1500 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001501 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001502 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001503 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504 0, /*tp_is_gc*/
1505};
1506
1507
1508/* ===================================================================== */
1509/* Methods of BZ2Comp. */
1510
1511PyDoc_STRVAR(BZ2Comp_compress__doc__,
1512"compress(data) -> string\n\
1513\n\
1514Provide more data to the compressor object. It will return chunks of\n\
1515compressed data whenever possible. When you've finished providing data\n\
1516to compress, call the flush() method to finish the compression process,\n\
1517and return what is left in the internal buffers.\n\
1518");
1519
1520static PyObject *
1521BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1522{
1523 char *data;
1524 int datasize;
1525 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001526 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001527 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001528 bz_stream *bzs = &self->bzs;
1529 int bzerror;
1530
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001531 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001532 return NULL;
1533
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001534 if (datasize == 0)
1535 return PyString_FromString("");
1536
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001537 ACQUIRE_LOCK(self);
1538 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001539 PyErr_SetString(PyExc_ValueError,
1540 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001541 goto error;
1542 }
1543
1544 ret = PyString_FromStringAndSize(NULL, bufsize);
1545 if (!ret)
1546 goto error;
1547
1548 bzs->next_in = data;
1549 bzs->avail_in = datasize;
1550 bzs->next_out = BUF(ret);
1551 bzs->avail_out = bufsize;
1552
1553 totalout = BZS_TOTAL_OUT(bzs);
1554
1555 for (;;) {
1556 Py_BEGIN_ALLOW_THREADS
1557 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1558 Py_END_ALLOW_THREADS
1559 if (bzerror != BZ_RUN_OK) {
1560 Util_CatchBZ2Error(bzerror);
1561 goto error;
1562 }
1563 if (bzs->avail_out == 0) {
1564 bufsize = Util_NewBufferSize(bufsize);
1565 if (_PyString_Resize(&ret, bufsize) < 0) {
1566 BZ2_bzCompressEnd(bzs);
1567 goto error;
1568 }
1569 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1570 - totalout);
1571 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1572 } else if (bzs->avail_in == 0) {
1573 break;
1574 }
1575 }
1576
Neal Norwitz047f3c72006-06-12 02:06:42 +00001577 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001578
1579 RELEASE_LOCK(self);
1580 return ret;
1581
1582error:
1583 RELEASE_LOCK(self);
1584 Py_XDECREF(ret);
1585 return NULL;
1586}
1587
1588PyDoc_STRVAR(BZ2Comp_flush__doc__,
1589"flush() -> string\n\
1590\n\
1591Finish the compression process and return what is left in internal buffers.\n\
1592You must not use the compressor object after calling this method.\n\
1593");
1594
1595static PyObject *
1596BZ2Comp_flush(BZ2CompObject *self)
1597{
1598 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001599 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001600 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001601 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001602 int bzerror;
1603
1604 ACQUIRE_LOCK(self);
1605 if (!self->running) {
1606 PyErr_SetString(PyExc_ValueError, "object was already "
1607 "flushed");
1608 goto error;
1609 }
1610 self->running = 0;
1611
1612 ret = PyString_FromStringAndSize(NULL, bufsize);
1613 if (!ret)
1614 goto error;
1615
1616 bzs->next_out = BUF(ret);
1617 bzs->avail_out = bufsize;
1618
1619 totalout = BZS_TOTAL_OUT(bzs);
1620
1621 for (;;) {
1622 Py_BEGIN_ALLOW_THREADS
1623 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1624 Py_END_ALLOW_THREADS
1625 if (bzerror == BZ_STREAM_END) {
1626 break;
1627 } else if (bzerror != BZ_FINISH_OK) {
1628 Util_CatchBZ2Error(bzerror);
1629 goto error;
1630 }
1631 if (bzs->avail_out == 0) {
1632 bufsize = Util_NewBufferSize(bufsize);
1633 if (_PyString_Resize(&ret, bufsize) < 0)
1634 goto error;
1635 bzs->next_out = BUF(ret);
1636 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1637 - totalout);
1638 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1639 }
1640 }
1641
1642 if (bzs->avail_out != 0)
Neal Norwitz047f3c72006-06-12 02:06:42 +00001643 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001644
1645 RELEASE_LOCK(self);
1646 return ret;
1647
1648error:
1649 RELEASE_LOCK(self);
1650 Py_XDECREF(ret);
1651 return NULL;
1652}
1653
1654static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001655 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1656 BZ2Comp_compress__doc__},
1657 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1658 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001659 {NULL, NULL} /* sentinel */
1660};
1661
1662
1663/* ===================================================================== */
1664/* Slot definitions for BZ2Comp_Type. */
1665
1666static int
1667BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1668{
1669 int compresslevel = 9;
1670 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001671 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001672
1673 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1674 kwlist, &compresslevel))
1675 return -1;
1676
1677 if (compresslevel < 1 || compresslevel > 9) {
1678 PyErr_SetString(PyExc_ValueError,
1679 "compresslevel must be between 1 and 9");
1680 goto error;
1681 }
1682
1683#ifdef WITH_THREAD
1684 self->lock = PyThread_allocate_lock();
1685 if (!self->lock)
1686 goto error;
1687#endif
1688
1689 memset(&self->bzs, 0, sizeof(bz_stream));
1690 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1691 if (bzerror != BZ_OK) {
1692 Util_CatchBZ2Error(bzerror);
1693 goto error;
1694 }
1695
1696 self->running = 1;
1697
1698 return 0;
1699error:
1700#ifdef WITH_THREAD
1701 if (self->lock)
1702 PyThread_free_lock(self->lock);
1703#endif
1704 return -1;
1705}
1706
1707static void
1708BZ2Comp_dealloc(BZ2CompObject *self)
1709{
1710#ifdef WITH_THREAD
1711 if (self->lock)
1712 PyThread_free_lock(self->lock);
1713#endif
1714 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001715 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001716}
1717
1718
1719/* ===================================================================== */
1720/* BZ2Comp_Type definition. */
1721
1722PyDoc_STRVAR(BZ2Comp__doc__,
1723"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1724\n\
1725Create a new compressor object. This object may be used to compress\n\
1726data sequentially. If you want to compress data in one shot, use the\n\
1727compress() function instead. The compresslevel parameter, if given,\n\
1728must be a number between 1 and 9.\n\
1729");
1730
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001731static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001732 PyObject_HEAD_INIT(NULL)
1733 0, /*ob_size*/
1734 "bz2.BZ2Compressor", /*tp_name*/
1735 sizeof(BZ2CompObject), /*tp_basicsize*/
1736 0, /*tp_itemsize*/
1737 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1738 0, /*tp_print*/
1739 0, /*tp_getattr*/
1740 0, /*tp_setattr*/
1741 0, /*tp_compare*/
1742 0, /*tp_repr*/
1743 0, /*tp_as_number*/
1744 0, /*tp_as_sequence*/
1745 0, /*tp_as_mapping*/
1746 0, /*tp_hash*/
1747 0, /*tp_call*/
1748 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001749 PyObject_GenericGetAttr,/*tp_getattro*/
1750 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001751 0, /*tp_as_buffer*/
1752 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1753 BZ2Comp__doc__, /*tp_doc*/
1754 0, /*tp_traverse*/
1755 0, /*tp_clear*/
1756 0, /*tp_richcompare*/
1757 0, /*tp_weaklistoffset*/
1758 0, /*tp_iter*/
1759 0, /*tp_iternext*/
1760 BZ2Comp_methods, /*tp_methods*/
1761 0, /*tp_members*/
1762 0, /*tp_getset*/
1763 0, /*tp_base*/
1764 0, /*tp_dict*/
1765 0, /*tp_descr_get*/
1766 0, /*tp_descr_set*/
1767 0, /*tp_dictoffset*/
1768 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001769 PyType_GenericAlloc, /*tp_alloc*/
1770 PyType_GenericNew, /*tp_new*/
1771 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001772 0, /*tp_is_gc*/
1773};
1774
1775
1776/* ===================================================================== */
1777/* Members of BZ2Decomp. */
1778
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001779#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001780#define OFF(x) offsetof(BZ2DecompObject, x)
1781
1782static PyMemberDef BZ2Decomp_members[] = {
1783 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1784 {NULL} /* Sentinel */
1785};
1786
1787
1788/* ===================================================================== */
1789/* Methods of BZ2Decomp. */
1790
1791PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1792"decompress(data) -> string\n\
1793\n\
1794Provide more data to the decompressor object. It will return chunks\n\
1795of decompressed data whenever possible. If you try to decompress data\n\
1796after the end of stream is found, EOFError will be raised. If any data\n\
1797was found after the end of stream, it'll be ignored and saved in\n\
1798unused_data attribute.\n\
1799");
1800
1801static PyObject *
1802BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1803{
1804 char *data;
1805 int datasize;
1806 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001807 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001808 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001809 bz_stream *bzs = &self->bzs;
1810 int bzerror;
1811
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001812 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001813 return NULL;
1814
1815 ACQUIRE_LOCK(self);
1816 if (!self->running) {
1817 PyErr_SetString(PyExc_EOFError, "end of stream was "
1818 "already found");
1819 goto error;
1820 }
1821
1822 ret = PyString_FromStringAndSize(NULL, bufsize);
1823 if (!ret)
1824 goto error;
1825
1826 bzs->next_in = data;
1827 bzs->avail_in = datasize;
1828 bzs->next_out = BUF(ret);
1829 bzs->avail_out = bufsize;
1830
1831 totalout = BZS_TOTAL_OUT(bzs);
1832
1833 for (;;) {
1834 Py_BEGIN_ALLOW_THREADS
1835 bzerror = BZ2_bzDecompress(bzs);
1836 Py_END_ALLOW_THREADS
1837 if (bzerror == BZ_STREAM_END) {
1838 if (bzs->avail_in != 0) {
1839 Py_DECREF(self->unused_data);
1840 self->unused_data =
1841 PyString_FromStringAndSize(bzs->next_in,
1842 bzs->avail_in);
1843 }
1844 self->running = 0;
1845 break;
1846 }
1847 if (bzerror != BZ_OK) {
1848 Util_CatchBZ2Error(bzerror);
1849 goto error;
1850 }
1851 if (bzs->avail_out == 0) {
1852 bufsize = Util_NewBufferSize(bufsize);
1853 if (_PyString_Resize(&ret, bufsize) < 0) {
1854 BZ2_bzDecompressEnd(bzs);
1855 goto error;
1856 }
1857 bzs->next_out = BUF(ret);
1858 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1859 - totalout);
1860 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1861 } else if (bzs->avail_in == 0) {
1862 break;
1863 }
1864 }
1865
1866 if (bzs->avail_out != 0)
Neal Norwitz047f3c72006-06-12 02:06:42 +00001867 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001868
1869 RELEASE_LOCK(self);
1870 return ret;
1871
1872error:
1873 RELEASE_LOCK(self);
1874 Py_XDECREF(ret);
1875 return NULL;
1876}
1877
1878static PyMethodDef BZ2Decomp_methods[] = {
1879 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1880 {NULL, NULL} /* sentinel */
1881};
1882
1883
1884/* ===================================================================== */
1885/* Slot definitions for BZ2Decomp_Type. */
1886
1887static int
1888BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1889{
1890 int bzerror;
1891
1892 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1893 return -1;
1894
1895#ifdef WITH_THREAD
1896 self->lock = PyThread_allocate_lock();
1897 if (!self->lock)
1898 goto error;
1899#endif
1900
1901 self->unused_data = PyString_FromString("");
1902 if (!self->unused_data)
1903 goto error;
1904
1905 memset(&self->bzs, 0, sizeof(bz_stream));
1906 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1907 if (bzerror != BZ_OK) {
1908 Util_CatchBZ2Error(bzerror);
1909 goto error;
1910 }
1911
1912 self->running = 1;
1913
1914 return 0;
1915
1916error:
1917#ifdef WITH_THREAD
1918 if (self->lock)
1919 PyThread_free_lock(self->lock);
1920#endif
1921 Py_XDECREF(self->unused_data);
1922 return -1;
1923}
1924
1925static void
1926BZ2Decomp_dealloc(BZ2DecompObject *self)
1927{
1928#ifdef WITH_THREAD
1929 if (self->lock)
1930 PyThread_free_lock(self->lock);
1931#endif
1932 Py_XDECREF(self->unused_data);
1933 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001934 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001935}
1936
1937
1938/* ===================================================================== */
1939/* BZ2Decomp_Type definition. */
1940
1941PyDoc_STRVAR(BZ2Decomp__doc__,
1942"BZ2Decompressor() -> decompressor object\n\
1943\n\
1944Create a new decompressor object. This object may be used to decompress\n\
1945data sequentially. If you want to decompress data in one shot, use the\n\
1946decompress() function instead.\n\
1947");
1948
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001949static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001950 PyObject_HEAD_INIT(NULL)
1951 0, /*ob_size*/
1952 "bz2.BZ2Decompressor", /*tp_name*/
1953 sizeof(BZ2DecompObject), /*tp_basicsize*/
1954 0, /*tp_itemsize*/
1955 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1956 0, /*tp_print*/
1957 0, /*tp_getattr*/
1958 0, /*tp_setattr*/
1959 0, /*tp_compare*/
1960 0, /*tp_repr*/
1961 0, /*tp_as_number*/
1962 0, /*tp_as_sequence*/
1963 0, /*tp_as_mapping*/
1964 0, /*tp_hash*/
1965 0, /*tp_call*/
1966 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001967 PyObject_GenericGetAttr,/*tp_getattro*/
1968 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001969 0, /*tp_as_buffer*/
1970 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1971 BZ2Decomp__doc__, /*tp_doc*/
1972 0, /*tp_traverse*/
1973 0, /*tp_clear*/
1974 0, /*tp_richcompare*/
1975 0, /*tp_weaklistoffset*/
1976 0, /*tp_iter*/
1977 0, /*tp_iternext*/
1978 BZ2Decomp_methods, /*tp_methods*/
1979 BZ2Decomp_members, /*tp_members*/
1980 0, /*tp_getset*/
1981 0, /*tp_base*/
1982 0, /*tp_dict*/
1983 0, /*tp_descr_get*/
1984 0, /*tp_descr_set*/
1985 0, /*tp_dictoffset*/
1986 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001987 PyType_GenericAlloc, /*tp_alloc*/
1988 PyType_GenericNew, /*tp_new*/
1989 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001990 0, /*tp_is_gc*/
1991};
1992
1993
1994/* ===================================================================== */
1995/* Module functions. */
1996
1997PyDoc_STRVAR(bz2_compress__doc__,
1998"compress(data [, compresslevel=9]) -> string\n\
1999\n\
2000Compress data in one shot. If you want to compress data sequentially,\n\
2001use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2002given, must be a number between 1 and 9.\n\
2003");
2004
2005static PyObject *
2006bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2007{
2008 int compresslevel=9;
2009 char *data;
2010 int datasize;
2011 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002012 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002013 bz_stream _bzs;
2014 bz_stream *bzs = &_bzs;
2015 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002016 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002017
2018 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2019 kwlist, &data, &datasize,
2020 &compresslevel))
2021 return NULL;
2022
2023 if (compresslevel < 1 || compresslevel > 9) {
2024 PyErr_SetString(PyExc_ValueError,
2025 "compresslevel must be between 1 and 9");
2026 return NULL;
2027 }
2028
2029 /* Conforming to bz2 manual, this is large enough to fit compressed
2030 * data in one shot. We will check it later anyway. */
2031 bufsize = datasize + (datasize/100+1) + 600;
2032
2033 ret = PyString_FromStringAndSize(NULL, bufsize);
2034 if (!ret)
2035 return NULL;
2036
2037 memset(bzs, 0, sizeof(bz_stream));
2038
2039 bzs->next_in = data;
2040 bzs->avail_in = datasize;
2041 bzs->next_out = BUF(ret);
2042 bzs->avail_out = bufsize;
2043
2044 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2045 if (bzerror != BZ_OK) {
2046 Util_CatchBZ2Error(bzerror);
2047 Py_DECREF(ret);
2048 return NULL;
2049 }
Tim Peterse3228092002-11-09 04:21:44 +00002050
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002051 for (;;) {
2052 Py_BEGIN_ALLOW_THREADS
2053 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2054 Py_END_ALLOW_THREADS
2055 if (bzerror == BZ_STREAM_END) {
2056 break;
2057 } else if (bzerror != BZ_FINISH_OK) {
2058 BZ2_bzCompressEnd(bzs);
2059 Util_CatchBZ2Error(bzerror);
2060 Py_DECREF(ret);
2061 return NULL;
2062 }
2063 if (bzs->avail_out == 0) {
2064 bufsize = Util_NewBufferSize(bufsize);
2065 if (_PyString_Resize(&ret, bufsize) < 0) {
2066 BZ2_bzCompressEnd(bzs);
2067 Py_DECREF(ret);
2068 return NULL;
2069 }
2070 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2071 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2072 }
2073 }
2074
2075 if (bzs->avail_out != 0)
Neal Norwitz047f3c72006-06-12 02:06:42 +00002076 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002077 BZ2_bzCompressEnd(bzs);
2078
2079 return ret;
2080}
2081
2082PyDoc_STRVAR(bz2_decompress__doc__,
2083"decompress(data) -> decompressed data\n\
2084\n\
2085Decompress data in one shot. If you want to decompress data sequentially,\n\
2086use an instance of BZ2Decompressor instead.\n\
2087");
2088
2089static PyObject *
2090bz2_decompress(PyObject *self, PyObject *args)
2091{
2092 char *data;
2093 int datasize;
2094 int bufsize = SMALLCHUNK;
2095 PyObject *ret;
2096 bz_stream _bzs;
2097 bz_stream *bzs = &_bzs;
2098 int bzerror;
2099
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002100 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002101 return NULL;
2102
2103 if (datasize == 0)
2104 return PyString_FromString("");
2105
2106 ret = PyString_FromStringAndSize(NULL, bufsize);
2107 if (!ret)
2108 return NULL;
2109
2110 memset(bzs, 0, sizeof(bz_stream));
2111
2112 bzs->next_in = data;
2113 bzs->avail_in = datasize;
2114 bzs->next_out = BUF(ret);
2115 bzs->avail_out = bufsize;
2116
2117 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2118 if (bzerror != BZ_OK) {
2119 Util_CatchBZ2Error(bzerror);
2120 Py_DECREF(ret);
2121 return NULL;
2122 }
Tim Peterse3228092002-11-09 04:21:44 +00002123
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002124 for (;;) {
2125 Py_BEGIN_ALLOW_THREADS
2126 bzerror = BZ2_bzDecompress(bzs);
2127 Py_END_ALLOW_THREADS
2128 if (bzerror == BZ_STREAM_END) {
2129 break;
2130 } else if (bzerror != BZ_OK) {
2131 BZ2_bzDecompressEnd(bzs);
2132 Util_CatchBZ2Error(bzerror);
2133 Py_DECREF(ret);
2134 return NULL;
2135 }
2136 if (bzs->avail_out == 0) {
2137 bufsize = Util_NewBufferSize(bufsize);
2138 if (_PyString_Resize(&ret, bufsize) < 0) {
2139 BZ2_bzDecompressEnd(bzs);
2140 Py_DECREF(ret);
2141 return NULL;
2142 }
2143 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2144 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2145 } else if (bzs->avail_in == 0) {
2146 BZ2_bzDecompressEnd(bzs);
2147 PyErr_SetString(PyExc_ValueError,
2148 "couldn't find end of stream");
2149 Py_DECREF(ret);
2150 return NULL;
2151 }
2152 }
2153
2154 if (bzs->avail_out != 0)
Neal Norwitz047f3c72006-06-12 02:06:42 +00002155 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002156 BZ2_bzDecompressEnd(bzs);
2157
2158 return ret;
2159}
2160
2161static PyMethodDef bz2_methods[] = {
2162 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2163 bz2_compress__doc__},
2164 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2165 bz2_decompress__doc__},
2166 {NULL, NULL} /* sentinel */
2167};
2168
2169/* ===================================================================== */
2170/* Initialization function. */
2171
2172PyDoc_STRVAR(bz2__doc__,
2173"The python bz2 module provides a comprehensive interface for\n\
2174the bz2 compression library. It implements a complete file\n\
2175interface, one shot (de)compression functions, and types for\n\
2176sequential (de)compression.\n\
2177");
2178
Neal Norwitz21d896c2003-07-01 20:15:21 +00002179PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002180initbz2(void)
2181{
2182 PyObject *m;
2183
2184 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002185 BZ2Comp_Type.ob_type = &PyType_Type;
2186 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002187
2188 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002189 if (m == NULL)
2190 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002191
2192 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2193
2194 Py_INCREF(&BZ2File_Type);
2195 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2196
2197 Py_INCREF(&BZ2Comp_Type);
2198 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2199
2200 Py_INCREF(&BZ2Decomp_Type);
2201 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2202}