blob: db9367546c552a1a17cdffd3a019657a1ab7269d [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32;
58#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
246
247 buf = BUF(v);
248 end = buf + total_v_size;
249
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (univ_newline) {
253 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 if (bzerror != BZ_OK || buf == end)
257 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000261 /* Seeing a \n here with
262 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263 * saw a \r before.
264 */
265 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000266 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
272 }
273 }
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
281 }
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000285 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
301 }
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
315 }
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
326}
327
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328/* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330size_t
331Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333{
334 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 int newlinetypes, skipnextlf;
336
337 assert(buf != NULL);
338 assert(stream != NULL);
339
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
342
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
345
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
348 */
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
353
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
364 }
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
370 }
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
375 */
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
382 }
383 }
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
389 }
390 }
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
394}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395
396/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000398Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
403 }
404}
405
406/* This is a hacked version of Python's fileobject.c:readahead(). */
407static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409{
410 int chunksize;
411 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412
413 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000414 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000415 return 0;
416 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 int len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000979 PyObject *offobj;
980 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 char small_buffer[SMALLCHUNK];
982 char *buffer = small_buffer;
983 size_t buffersize = SMALLCHUNK;
984 int bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000985 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 int chunksize;
987 int bzerror;
988 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000989 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000990
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000991 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
992 return NULL;
993#if !defined(HAVE_LARGEFILE_SUPPORT)
994 offset = PyInt_AsLong(offobj);
995#else
996 offset = PyLong_Check(offobj) ?
997 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
998#endif
999 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 return NULL;
1001
1002 ACQUIRE_LOCK(self);
1003 Util_DropReadAhead(self);
1004 switch (self->mode) {
1005 case MODE_READ:
1006 case MODE_READ_EOF:
1007 break;
Tim Peterse3228092002-11-09 04:21:44 +00001008
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001009 case MODE_CLOSED:
1010 PyErr_SetString(PyExc_ValueError,
1011 "I/O operation on closed file");
1012 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001013
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001014 default:
1015 PyErr_SetString(PyExc_IOError,
1016 "seek works only while reading");
1017 goto cleanup;;
1018 }
1019
1020 if (offset < 0) {
1021 if (where == 1) {
1022 offset = self->pos + offset;
1023 rewind = 1;
1024 } else if (where == 2) {
1025 if (self->size == -1) {
1026 assert(self->mode != MODE_READ_EOF);
1027 for (;;) {
1028 Py_BEGIN_ALLOW_THREADS
1029 chunksize = Util_UnivNewlineRead(
1030 &bzerror, self->fp,
1031 buffer, buffersize,
1032 self);
1033 self->pos += chunksize;
1034 Py_END_ALLOW_THREADS
1035
1036 bytesread += chunksize;
1037 if (bzerror == BZ_STREAM_END) {
1038 break;
1039 } else if (bzerror != BZ_OK) {
1040 Util_CatchBZ2Error(bzerror);
1041 goto cleanup;
1042 }
1043 }
1044 self->mode = MODE_READ_EOF;
1045 self->size = self->pos;
1046 bytesread = 0;
1047 }
1048 offset = self->size + offset;
1049 if (offset >= self->pos)
1050 offset -= self->pos;
1051 else
1052 rewind = 1;
1053 }
1054 if (offset < 0)
1055 offset = 0;
1056 } else if (where == 0) {
1057 if (offset >= self->pos)
1058 offset -= self->pos;
1059 else
1060 rewind = 1;
1061 }
1062
1063 if (rewind) {
1064 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065 if (bzerror != BZ_OK) {
1066 Util_CatchBZ2Error(bzerror);
1067 goto cleanup;
1068 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001069 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 if (!ret)
1071 goto cleanup;
1072 Py_DECREF(ret);
1073 ret = NULL;
1074 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001075 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076 0, 0, NULL, 0);
1077 if (bzerror != BZ_OK) {
1078 Util_CatchBZ2Error(bzerror);
1079 goto cleanup;
1080 }
1081 self->mode = MODE_READ;
1082 } else if (self->mode == MODE_READ_EOF) {
1083 goto exit;
1084 }
1085
1086 if (offset == 0)
1087 goto exit;
1088
1089 /* Before getting here, offset must be set to the number of bytes
1090 * to walk forward. */
1091 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001092 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001093 readsize = buffersize;
1094 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001095 /* offset might be wider that readsize, but the result
1096 * of the subtraction is bound by buffersize (see the
1097 * condition above). buffersize is 8192. */
1098 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001099 Py_BEGIN_ALLOW_THREADS
1100 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1101 buffer, readsize, self);
1102 self->pos += chunksize;
1103 Py_END_ALLOW_THREADS
1104 bytesread += chunksize;
1105 if (bzerror == BZ_STREAM_END) {
1106 self->size = self->pos;
1107 self->mode = MODE_READ_EOF;
1108 break;
1109 } else if (bzerror != BZ_OK) {
1110 Util_CatchBZ2Error(bzerror);
1111 goto cleanup;
1112 }
1113 if (bytesread == offset)
1114 break;
1115 }
1116
1117exit:
1118 Py_INCREF(Py_None);
1119 ret = Py_None;
1120
1121cleanup:
1122 RELEASE_LOCK(self);
1123 return ret;
1124}
1125
1126PyDoc_STRVAR(BZ2File_tell__doc__,
1127"tell() -> int\n\
1128\n\
1129Return the current file position, an integer (may be a long integer).\n\
1130");
1131
1132static PyObject *
1133BZ2File_tell(BZ2FileObject *self, PyObject *args)
1134{
1135 PyObject *ret = NULL;
1136
1137 if (self->mode == MODE_CLOSED) {
1138 PyErr_SetString(PyExc_ValueError,
1139 "I/O operation on closed file");
1140 goto cleanup;
1141 }
1142
Georg Brandla8bcecc2005-09-03 07:49:53 +00001143#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001144 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001145#else
1146 ret = PyLong_FromLongLong(self->pos);
1147#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001148
1149cleanup:
1150 return ret;
1151}
1152
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001153PyDoc_STRVAR(BZ2File_close__doc__,
1154"close() -> None or (perhaps) an integer\n\
1155\n\
1156Close the file. Sets data attribute .closed to true. A closed file\n\
1157cannot be used for further I/O operations. close() may be called more\n\
1158than once without error.\n\
1159");
1160
1161static PyObject *
1162BZ2File_close(BZ2FileObject *self)
1163{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001164 PyObject *ret = NULL;
1165 int bzerror = BZ_OK;
1166
1167 ACQUIRE_LOCK(self);
1168 switch (self->mode) {
1169 case MODE_READ:
1170 case MODE_READ_EOF:
1171 BZ2_bzReadClose(&bzerror, self->fp);
1172 break;
1173 case MODE_WRITE:
1174 BZ2_bzWriteClose(&bzerror, self->fp,
1175 0, NULL, NULL);
1176 break;
1177 }
1178 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001179 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001180 if (bzerror != BZ_OK) {
1181 Util_CatchBZ2Error(bzerror);
1182 Py_XDECREF(ret);
1183 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001184 }
1185
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001186 RELEASE_LOCK(self);
1187 return ret;
1188}
1189
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001190static PyObject *BZ2File_getiter(BZ2FileObject *self);
1191
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192static PyMethodDef BZ2File_methods[] = {
1193 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1194 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1195 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001196 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001197 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1198 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1199 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1200 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001201 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1202 {NULL, NULL} /* sentinel */
1203};
1204
1205
1206/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001207/* Getters and setters of BZ2File. */
1208
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001209/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1210static PyObject *
1211BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1212{
1213 switch (self->f_newlinetypes) {
1214 case NEWLINE_UNKNOWN:
1215 Py_INCREF(Py_None);
1216 return Py_None;
1217 case NEWLINE_CR:
1218 return PyString_FromString("\r");
1219 case NEWLINE_LF:
1220 return PyString_FromString("\n");
1221 case NEWLINE_CR|NEWLINE_LF:
1222 return Py_BuildValue("(ss)", "\r", "\n");
1223 case NEWLINE_CRLF:
1224 return PyString_FromString("\r\n");
1225 case NEWLINE_CR|NEWLINE_CRLF:
1226 return Py_BuildValue("(ss)", "\r", "\r\n");
1227 case NEWLINE_LF|NEWLINE_CRLF:
1228 return Py_BuildValue("(ss)", "\n", "\r\n");
1229 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1230 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1231 default:
1232 PyErr_Format(PyExc_SystemError,
1233 "Unknown newlines value 0x%x\n",
1234 self->f_newlinetypes);
1235 return NULL;
1236 }
1237}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001238
1239static PyObject *
1240BZ2File_get_closed(BZ2FileObject *self, void *closure)
1241{
1242 return PyInt_FromLong(self->mode == MODE_CLOSED);
1243}
1244
1245static PyObject *
1246BZ2File_get_mode(BZ2FileObject *self, void *closure)
1247{
1248 return PyObject_GetAttrString(self->file, "mode");
1249}
1250
1251static PyObject *
1252BZ2File_get_name(BZ2FileObject *self, void *closure)
1253{
1254 return PyObject_GetAttrString(self->file, "name");
1255}
1256
1257static PyGetSetDef BZ2File_getset[] = {
1258 {"closed", (getter)BZ2File_get_closed, NULL,
1259 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001260 {"newlines", (getter)BZ2File_get_newlines, NULL,
1261 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001262 {"mode", (getter)BZ2File_get_mode, NULL,
1263 "file mode ('r', 'w', or 'U')"},
1264 {"name", (getter)BZ2File_get_name, NULL,
1265 "file name"},
1266 {NULL} /* Sentinel */
1267};
1268
1269
1270/* ===================================================================== */
1271/* Members of BZ2File_Type. */
1272
1273#undef OFF
1274#define OFF(x) offsetof(BZ2FileObject, x)
1275
1276static PyMemberDef BZ2File_members[] = {
1277 {"softspace", T_INT, OFF(f_softspace), 0,
1278 "flag indicating that a space needs to be printed; used by print"},
1279 {NULL} /* Sentinel */
1280};
1281
1282/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001283/* Slot definitions for BZ2File_Type. */
1284
1285static int
1286BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1287{
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001288 static const char *kwlist[] = {"filename", "mode", "buffering",
1289 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001290 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001291 char *mode = "r";
1292 int buffering = -1;
1293 int compresslevel = 9;
1294 int bzerror;
1295 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001296
1297 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001298
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001299 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1300 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001301 &compresslevel))
1302 return -1;
1303
1304 if (compresslevel < 1 || compresslevel > 9) {
1305 PyErr_SetString(PyExc_ValueError,
1306 "compresslevel must be between 1 and 9");
1307 return -1;
1308 }
1309
1310 for (;;) {
1311 int error = 0;
1312 switch (*mode) {
1313 case 'r':
1314 case 'w':
1315 if (mode_char)
1316 error = 1;
1317 mode_char = *mode;
1318 break;
1319
1320 case 'b':
1321 break;
1322
1323 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001324 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001325 break;
1326
1327 default:
1328 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001329 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001330 }
1331 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001332 PyErr_Format(PyExc_ValueError,
1333 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001334 return -1;
1335 }
1336 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001337 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001338 break;
1339 }
1340
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001341 if (mode_char == 0) {
1342 mode_char = 'r';
1343 }
1344
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001345 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001346
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001347 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1348 name, mode, buffering);
1349 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001350 return -1;
1351
1352 /* From now on, we have stuff to dealloc, so jump to error label
1353 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001354
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001355#ifdef WITH_THREAD
1356 self->lock = PyThread_allocate_lock();
1357 if (!self->lock)
1358 goto error;
1359#endif
1360
1361 if (mode_char == 'r')
1362 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001363 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364 0, 0, NULL, 0);
1365 else
1366 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001367 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001368 compresslevel, 0, 0);
1369
1370 if (bzerror != BZ_OK) {
1371 Util_CatchBZ2Error(bzerror);
1372 goto error;
1373 }
1374
1375 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1376
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377 return 0;
1378
1379error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001380 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001381#ifdef WITH_THREAD
1382 if (self->lock)
1383 PyThread_free_lock(self->lock);
1384#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001385 return -1;
1386}
1387
1388static void
1389BZ2File_dealloc(BZ2FileObject *self)
1390{
1391 int bzerror;
1392#ifdef WITH_THREAD
1393 if (self->lock)
1394 PyThread_free_lock(self->lock);
1395#endif
1396 switch (self->mode) {
1397 case MODE_READ:
1398 case MODE_READ_EOF:
1399 BZ2_bzReadClose(&bzerror, self->fp);
1400 break;
1401 case MODE_WRITE:
1402 BZ2_bzWriteClose(&bzerror, self->fp,
1403 0, NULL, NULL);
1404 break;
1405 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001406 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001407 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001408 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001409}
1410
1411/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1412static PyObject *
1413BZ2File_getiter(BZ2FileObject *self)
1414{
1415 if (self->mode == MODE_CLOSED) {
1416 PyErr_SetString(PyExc_ValueError,
1417 "I/O operation on closed file");
1418 return NULL;
1419 }
1420 Py_INCREF((PyObject*)self);
1421 return (PyObject *)self;
1422}
1423
1424/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1425#define READAHEAD_BUFSIZE 8192
1426static PyObject *
1427BZ2File_iternext(BZ2FileObject *self)
1428{
1429 PyStringObject* ret;
1430 ACQUIRE_LOCK(self);
1431 if (self->mode == MODE_CLOSED) {
1432 PyErr_SetString(PyExc_ValueError,
1433 "I/O operation on closed file");
1434 return NULL;
1435 }
1436 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1437 RELEASE_LOCK(self);
1438 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1439 Py_XDECREF(ret);
1440 return NULL;
1441 }
1442 return (PyObject *)ret;
1443}
1444
1445/* ===================================================================== */
1446/* BZ2File_Type definition. */
1447
1448PyDoc_VAR(BZ2File__doc__) =
1449PyDoc_STR(
1450"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1451\n\
1452Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1453writing. When opened for writing, the file will be created if it doesn't\n\
1454exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1455unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1456is given, must be a number between 1 and 9.\n\
1457")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001458PyDoc_STR(
1459"\n\
1460Add a 'U' to mode to open the file for input with universal newline\n\
1461support. Any line ending in the input file will be seen as a '\\n' in\n\
1462Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1463for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1464'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1465newlines are available only when reading.\n\
1466")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467;
1468
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001469static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001470 PyObject_HEAD_INIT(NULL)
1471 0, /*ob_size*/
1472 "bz2.BZ2File", /*tp_name*/
1473 sizeof(BZ2FileObject), /*tp_basicsize*/
1474 0, /*tp_itemsize*/
1475 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1476 0, /*tp_print*/
1477 0, /*tp_getattr*/
1478 0, /*tp_setattr*/
1479 0, /*tp_compare*/
1480 0, /*tp_repr*/
1481 0, /*tp_as_number*/
1482 0, /*tp_as_sequence*/
1483 0, /*tp_as_mapping*/
1484 0, /*tp_hash*/
1485 0, /*tp_call*/
1486 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001487 PyObject_GenericGetAttr,/*tp_getattro*/
1488 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001489 0, /*tp_as_buffer*/
1490 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1491 BZ2File__doc__, /*tp_doc*/
1492 0, /*tp_traverse*/
1493 0, /*tp_clear*/
1494 0, /*tp_richcompare*/
1495 0, /*tp_weaklistoffset*/
1496 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1497 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1498 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001499 BZ2File_members, /*tp_members*/
1500 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001501 0, /*tp_base*/
1502 0, /*tp_dict*/
1503 0, /*tp_descr_get*/
1504 0, /*tp_descr_set*/
1505 0, /*tp_dictoffset*/
1506 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001507 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001508 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001509 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001510 0, /*tp_is_gc*/
1511};
1512
1513
1514/* ===================================================================== */
1515/* Methods of BZ2Comp. */
1516
1517PyDoc_STRVAR(BZ2Comp_compress__doc__,
1518"compress(data) -> string\n\
1519\n\
1520Provide more data to the compressor object. It will return chunks of\n\
1521compressed data whenever possible. When you've finished providing data\n\
1522to compress, call the flush() method to finish the compression process,\n\
1523and return what is left in the internal buffers.\n\
1524");
1525
1526static PyObject *
1527BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1528{
1529 char *data;
1530 int datasize;
1531 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001532 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001533 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001534 bz_stream *bzs = &self->bzs;
1535 int bzerror;
1536
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001537 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001538 return NULL;
1539
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001540 if (datasize == 0)
1541 return PyString_FromString("");
1542
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001543 ACQUIRE_LOCK(self);
1544 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001545 PyErr_SetString(PyExc_ValueError,
1546 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001547 goto error;
1548 }
1549
1550 ret = PyString_FromStringAndSize(NULL, bufsize);
1551 if (!ret)
1552 goto error;
1553
1554 bzs->next_in = data;
1555 bzs->avail_in = datasize;
1556 bzs->next_out = BUF(ret);
1557 bzs->avail_out = bufsize;
1558
1559 totalout = BZS_TOTAL_OUT(bzs);
1560
1561 for (;;) {
1562 Py_BEGIN_ALLOW_THREADS
1563 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1564 Py_END_ALLOW_THREADS
1565 if (bzerror != BZ_RUN_OK) {
1566 Util_CatchBZ2Error(bzerror);
1567 goto error;
1568 }
1569 if (bzs->avail_out == 0) {
1570 bufsize = Util_NewBufferSize(bufsize);
1571 if (_PyString_Resize(&ret, bufsize) < 0) {
1572 BZ2_bzCompressEnd(bzs);
1573 goto error;
1574 }
1575 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1576 - totalout);
1577 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1578 } else if (bzs->avail_in == 0) {
1579 break;
1580 }
1581 }
1582
Tim Petersf29f0c62002-11-09 04:28:17 +00001583 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001584
1585 RELEASE_LOCK(self);
1586 return ret;
1587
1588error:
1589 RELEASE_LOCK(self);
1590 Py_XDECREF(ret);
1591 return NULL;
1592}
1593
1594PyDoc_STRVAR(BZ2Comp_flush__doc__,
1595"flush() -> string\n\
1596\n\
1597Finish the compression process and return what is left in internal buffers.\n\
1598You must not use the compressor object after calling this method.\n\
1599");
1600
1601static PyObject *
1602BZ2Comp_flush(BZ2CompObject *self)
1603{
1604 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001605 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001606 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001607 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001608 int bzerror;
1609
1610 ACQUIRE_LOCK(self);
1611 if (!self->running) {
1612 PyErr_SetString(PyExc_ValueError, "object was already "
1613 "flushed");
1614 goto error;
1615 }
1616 self->running = 0;
1617
1618 ret = PyString_FromStringAndSize(NULL, bufsize);
1619 if (!ret)
1620 goto error;
1621
1622 bzs->next_out = BUF(ret);
1623 bzs->avail_out = bufsize;
1624
1625 totalout = BZS_TOTAL_OUT(bzs);
1626
1627 for (;;) {
1628 Py_BEGIN_ALLOW_THREADS
1629 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1630 Py_END_ALLOW_THREADS
1631 if (bzerror == BZ_STREAM_END) {
1632 break;
1633 } else if (bzerror != BZ_FINISH_OK) {
1634 Util_CatchBZ2Error(bzerror);
1635 goto error;
1636 }
1637 if (bzs->avail_out == 0) {
1638 bufsize = Util_NewBufferSize(bufsize);
1639 if (_PyString_Resize(&ret, bufsize) < 0)
1640 goto error;
1641 bzs->next_out = BUF(ret);
1642 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1643 - totalout);
1644 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1645 }
1646 }
1647
1648 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001649 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001650
1651 RELEASE_LOCK(self);
1652 return ret;
1653
1654error:
1655 RELEASE_LOCK(self);
1656 Py_XDECREF(ret);
1657 return NULL;
1658}
1659
1660static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001661 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1662 BZ2Comp_compress__doc__},
1663 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1664 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001665 {NULL, NULL} /* sentinel */
1666};
1667
1668
1669/* ===================================================================== */
1670/* Slot definitions for BZ2Comp_Type. */
1671
1672static int
1673BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1674{
1675 int compresslevel = 9;
1676 int bzerror;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001677 static const char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001678
1679 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1680 kwlist, &compresslevel))
1681 return -1;
1682
1683 if (compresslevel < 1 || compresslevel > 9) {
1684 PyErr_SetString(PyExc_ValueError,
1685 "compresslevel must be between 1 and 9");
1686 goto error;
1687 }
1688
1689#ifdef WITH_THREAD
1690 self->lock = PyThread_allocate_lock();
1691 if (!self->lock)
1692 goto error;
1693#endif
1694
1695 memset(&self->bzs, 0, sizeof(bz_stream));
1696 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1697 if (bzerror != BZ_OK) {
1698 Util_CatchBZ2Error(bzerror);
1699 goto error;
1700 }
1701
1702 self->running = 1;
1703
1704 return 0;
1705error:
1706#ifdef WITH_THREAD
1707 if (self->lock)
1708 PyThread_free_lock(self->lock);
1709#endif
1710 return -1;
1711}
1712
1713static void
1714BZ2Comp_dealloc(BZ2CompObject *self)
1715{
1716#ifdef WITH_THREAD
1717 if (self->lock)
1718 PyThread_free_lock(self->lock);
1719#endif
1720 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001721 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001722}
1723
1724
1725/* ===================================================================== */
1726/* BZ2Comp_Type definition. */
1727
1728PyDoc_STRVAR(BZ2Comp__doc__,
1729"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1730\n\
1731Create a new compressor object. This object may be used to compress\n\
1732data sequentially. If you want to compress data in one shot, use the\n\
1733compress() function instead. The compresslevel parameter, if given,\n\
1734must be a number between 1 and 9.\n\
1735");
1736
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001737static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001738 PyObject_HEAD_INIT(NULL)
1739 0, /*ob_size*/
1740 "bz2.BZ2Compressor", /*tp_name*/
1741 sizeof(BZ2CompObject), /*tp_basicsize*/
1742 0, /*tp_itemsize*/
1743 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1744 0, /*tp_print*/
1745 0, /*tp_getattr*/
1746 0, /*tp_setattr*/
1747 0, /*tp_compare*/
1748 0, /*tp_repr*/
1749 0, /*tp_as_number*/
1750 0, /*tp_as_sequence*/
1751 0, /*tp_as_mapping*/
1752 0, /*tp_hash*/
1753 0, /*tp_call*/
1754 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001755 PyObject_GenericGetAttr,/*tp_getattro*/
1756 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001757 0, /*tp_as_buffer*/
1758 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1759 BZ2Comp__doc__, /*tp_doc*/
1760 0, /*tp_traverse*/
1761 0, /*tp_clear*/
1762 0, /*tp_richcompare*/
1763 0, /*tp_weaklistoffset*/
1764 0, /*tp_iter*/
1765 0, /*tp_iternext*/
1766 BZ2Comp_methods, /*tp_methods*/
1767 0, /*tp_members*/
1768 0, /*tp_getset*/
1769 0, /*tp_base*/
1770 0, /*tp_dict*/
1771 0, /*tp_descr_get*/
1772 0, /*tp_descr_set*/
1773 0, /*tp_dictoffset*/
1774 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001775 PyType_GenericAlloc, /*tp_alloc*/
1776 PyType_GenericNew, /*tp_new*/
1777 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001778 0, /*tp_is_gc*/
1779};
1780
1781
1782/* ===================================================================== */
1783/* Members of BZ2Decomp. */
1784
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001785#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001786#define OFF(x) offsetof(BZ2DecompObject, x)
1787
1788static PyMemberDef BZ2Decomp_members[] = {
1789 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1790 {NULL} /* Sentinel */
1791};
1792
1793
1794/* ===================================================================== */
1795/* Methods of BZ2Decomp. */
1796
1797PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1798"decompress(data) -> string\n\
1799\n\
1800Provide more data to the decompressor object. It will return chunks\n\
1801of decompressed data whenever possible. If you try to decompress data\n\
1802after the end of stream is found, EOFError will be raised. If any data\n\
1803was found after the end of stream, it'll be ignored and saved in\n\
1804unused_data attribute.\n\
1805");
1806
1807static PyObject *
1808BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1809{
1810 char *data;
1811 int datasize;
1812 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001813 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001814 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001815 bz_stream *bzs = &self->bzs;
1816 int bzerror;
1817
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001818 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001819 return NULL;
1820
1821 ACQUIRE_LOCK(self);
1822 if (!self->running) {
1823 PyErr_SetString(PyExc_EOFError, "end of stream was "
1824 "already found");
1825 goto error;
1826 }
1827
1828 ret = PyString_FromStringAndSize(NULL, bufsize);
1829 if (!ret)
1830 goto error;
1831
1832 bzs->next_in = data;
1833 bzs->avail_in = datasize;
1834 bzs->next_out = BUF(ret);
1835 bzs->avail_out = bufsize;
1836
1837 totalout = BZS_TOTAL_OUT(bzs);
1838
1839 for (;;) {
1840 Py_BEGIN_ALLOW_THREADS
1841 bzerror = BZ2_bzDecompress(bzs);
1842 Py_END_ALLOW_THREADS
1843 if (bzerror == BZ_STREAM_END) {
1844 if (bzs->avail_in != 0) {
1845 Py_DECREF(self->unused_data);
1846 self->unused_data =
1847 PyString_FromStringAndSize(bzs->next_in,
1848 bzs->avail_in);
1849 }
1850 self->running = 0;
1851 break;
1852 }
1853 if (bzerror != BZ_OK) {
1854 Util_CatchBZ2Error(bzerror);
1855 goto error;
1856 }
1857 if (bzs->avail_out == 0) {
1858 bufsize = Util_NewBufferSize(bufsize);
1859 if (_PyString_Resize(&ret, bufsize) < 0) {
1860 BZ2_bzDecompressEnd(bzs);
1861 goto error;
1862 }
1863 bzs->next_out = BUF(ret);
1864 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1865 - totalout);
1866 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1867 } else if (bzs->avail_in == 0) {
1868 break;
1869 }
1870 }
1871
1872 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001873 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001874
1875 RELEASE_LOCK(self);
1876 return ret;
1877
1878error:
1879 RELEASE_LOCK(self);
1880 Py_XDECREF(ret);
1881 return NULL;
1882}
1883
1884static PyMethodDef BZ2Decomp_methods[] = {
1885 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1886 {NULL, NULL} /* sentinel */
1887};
1888
1889
1890/* ===================================================================== */
1891/* Slot definitions for BZ2Decomp_Type. */
1892
1893static int
1894BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1895{
1896 int bzerror;
1897
1898 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1899 return -1;
1900
1901#ifdef WITH_THREAD
1902 self->lock = PyThread_allocate_lock();
1903 if (!self->lock)
1904 goto error;
1905#endif
1906
1907 self->unused_data = PyString_FromString("");
1908 if (!self->unused_data)
1909 goto error;
1910
1911 memset(&self->bzs, 0, sizeof(bz_stream));
1912 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1913 if (bzerror != BZ_OK) {
1914 Util_CatchBZ2Error(bzerror);
1915 goto error;
1916 }
1917
1918 self->running = 1;
1919
1920 return 0;
1921
1922error:
1923#ifdef WITH_THREAD
1924 if (self->lock)
1925 PyThread_free_lock(self->lock);
1926#endif
1927 Py_XDECREF(self->unused_data);
1928 return -1;
1929}
1930
1931static void
1932BZ2Decomp_dealloc(BZ2DecompObject *self)
1933{
1934#ifdef WITH_THREAD
1935 if (self->lock)
1936 PyThread_free_lock(self->lock);
1937#endif
1938 Py_XDECREF(self->unused_data);
1939 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001940 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941}
1942
1943
1944/* ===================================================================== */
1945/* BZ2Decomp_Type definition. */
1946
1947PyDoc_STRVAR(BZ2Decomp__doc__,
1948"BZ2Decompressor() -> decompressor object\n\
1949\n\
1950Create a new decompressor object. This object may be used to decompress\n\
1951data sequentially. If you want to decompress data in one shot, use the\n\
1952decompress() function instead.\n\
1953");
1954
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001955static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001956 PyObject_HEAD_INIT(NULL)
1957 0, /*ob_size*/
1958 "bz2.BZ2Decompressor", /*tp_name*/
1959 sizeof(BZ2DecompObject), /*tp_basicsize*/
1960 0, /*tp_itemsize*/
1961 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1962 0, /*tp_print*/
1963 0, /*tp_getattr*/
1964 0, /*tp_setattr*/
1965 0, /*tp_compare*/
1966 0, /*tp_repr*/
1967 0, /*tp_as_number*/
1968 0, /*tp_as_sequence*/
1969 0, /*tp_as_mapping*/
1970 0, /*tp_hash*/
1971 0, /*tp_call*/
1972 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001973 PyObject_GenericGetAttr,/*tp_getattro*/
1974 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001975 0, /*tp_as_buffer*/
1976 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1977 BZ2Decomp__doc__, /*tp_doc*/
1978 0, /*tp_traverse*/
1979 0, /*tp_clear*/
1980 0, /*tp_richcompare*/
1981 0, /*tp_weaklistoffset*/
1982 0, /*tp_iter*/
1983 0, /*tp_iternext*/
1984 BZ2Decomp_methods, /*tp_methods*/
1985 BZ2Decomp_members, /*tp_members*/
1986 0, /*tp_getset*/
1987 0, /*tp_base*/
1988 0, /*tp_dict*/
1989 0, /*tp_descr_get*/
1990 0, /*tp_descr_set*/
1991 0, /*tp_dictoffset*/
1992 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001993 PyType_GenericAlloc, /*tp_alloc*/
1994 PyType_GenericNew, /*tp_new*/
1995 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001996 0, /*tp_is_gc*/
1997};
1998
1999
2000/* ===================================================================== */
2001/* Module functions. */
2002
2003PyDoc_STRVAR(bz2_compress__doc__,
2004"compress(data [, compresslevel=9]) -> string\n\
2005\n\
2006Compress data in one shot. If you want to compress data sequentially,\n\
2007use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2008given, must be a number between 1 and 9.\n\
2009");
2010
2011static PyObject *
2012bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2013{
2014 int compresslevel=9;
2015 char *data;
2016 int datasize;
2017 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002018 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019 bz_stream _bzs;
2020 bz_stream *bzs = &_bzs;
2021 int bzerror;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00002022 static const char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023
2024 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2025 kwlist, &data, &datasize,
2026 &compresslevel))
2027 return NULL;
2028
2029 if (compresslevel < 1 || compresslevel > 9) {
2030 PyErr_SetString(PyExc_ValueError,
2031 "compresslevel must be between 1 and 9");
2032 return NULL;
2033 }
2034
2035 /* Conforming to bz2 manual, this is large enough to fit compressed
2036 * data in one shot. We will check it later anyway. */
2037 bufsize = datasize + (datasize/100+1) + 600;
2038
2039 ret = PyString_FromStringAndSize(NULL, bufsize);
2040 if (!ret)
2041 return NULL;
2042
2043 memset(bzs, 0, sizeof(bz_stream));
2044
2045 bzs->next_in = data;
2046 bzs->avail_in = datasize;
2047 bzs->next_out = BUF(ret);
2048 bzs->avail_out = bufsize;
2049
2050 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2051 if (bzerror != BZ_OK) {
2052 Util_CatchBZ2Error(bzerror);
2053 Py_DECREF(ret);
2054 return NULL;
2055 }
Tim Peterse3228092002-11-09 04:21:44 +00002056
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002057 for (;;) {
2058 Py_BEGIN_ALLOW_THREADS
2059 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2060 Py_END_ALLOW_THREADS
2061 if (bzerror == BZ_STREAM_END) {
2062 break;
2063 } else if (bzerror != BZ_FINISH_OK) {
2064 BZ2_bzCompressEnd(bzs);
2065 Util_CatchBZ2Error(bzerror);
2066 Py_DECREF(ret);
2067 return NULL;
2068 }
2069 if (bzs->avail_out == 0) {
2070 bufsize = Util_NewBufferSize(bufsize);
2071 if (_PyString_Resize(&ret, bufsize) < 0) {
2072 BZ2_bzCompressEnd(bzs);
2073 Py_DECREF(ret);
2074 return NULL;
2075 }
2076 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2077 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2078 }
2079 }
2080
2081 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002082 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002083 BZ2_bzCompressEnd(bzs);
2084
2085 return ret;
2086}
2087
2088PyDoc_STRVAR(bz2_decompress__doc__,
2089"decompress(data) -> decompressed data\n\
2090\n\
2091Decompress data in one shot. If you want to decompress data sequentially,\n\
2092use an instance of BZ2Decompressor instead.\n\
2093");
2094
2095static PyObject *
2096bz2_decompress(PyObject *self, PyObject *args)
2097{
2098 char *data;
2099 int datasize;
2100 int bufsize = SMALLCHUNK;
2101 PyObject *ret;
2102 bz_stream _bzs;
2103 bz_stream *bzs = &_bzs;
2104 int bzerror;
2105
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002106 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002107 return NULL;
2108
2109 if (datasize == 0)
2110 return PyString_FromString("");
2111
2112 ret = PyString_FromStringAndSize(NULL, bufsize);
2113 if (!ret)
2114 return NULL;
2115
2116 memset(bzs, 0, sizeof(bz_stream));
2117
2118 bzs->next_in = data;
2119 bzs->avail_in = datasize;
2120 bzs->next_out = BUF(ret);
2121 bzs->avail_out = bufsize;
2122
2123 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2124 if (bzerror != BZ_OK) {
2125 Util_CatchBZ2Error(bzerror);
2126 Py_DECREF(ret);
2127 return NULL;
2128 }
Tim Peterse3228092002-11-09 04:21:44 +00002129
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002130 for (;;) {
2131 Py_BEGIN_ALLOW_THREADS
2132 bzerror = BZ2_bzDecompress(bzs);
2133 Py_END_ALLOW_THREADS
2134 if (bzerror == BZ_STREAM_END) {
2135 break;
2136 } else if (bzerror != BZ_OK) {
2137 BZ2_bzDecompressEnd(bzs);
2138 Util_CatchBZ2Error(bzerror);
2139 Py_DECREF(ret);
2140 return NULL;
2141 }
2142 if (bzs->avail_out == 0) {
2143 bufsize = Util_NewBufferSize(bufsize);
2144 if (_PyString_Resize(&ret, bufsize) < 0) {
2145 BZ2_bzDecompressEnd(bzs);
2146 Py_DECREF(ret);
2147 return NULL;
2148 }
2149 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2150 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2151 } else if (bzs->avail_in == 0) {
2152 BZ2_bzDecompressEnd(bzs);
2153 PyErr_SetString(PyExc_ValueError,
2154 "couldn't find end of stream");
2155 Py_DECREF(ret);
2156 return NULL;
2157 }
2158 }
2159
2160 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002161 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002162 BZ2_bzDecompressEnd(bzs);
2163
2164 return ret;
2165}
2166
2167static PyMethodDef bz2_methods[] = {
2168 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2169 bz2_compress__doc__},
2170 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2171 bz2_decompress__doc__},
2172 {NULL, NULL} /* sentinel */
2173};
2174
2175/* ===================================================================== */
2176/* Initialization function. */
2177
2178PyDoc_STRVAR(bz2__doc__,
2179"The python bz2 module provides a comprehensive interface for\n\
2180the bz2 compression library. It implements a complete file\n\
2181interface, one shot (de)compression functions, and types for\n\
2182sequential (de)compression.\n\
2183");
2184
Neal Norwitz21d896c2003-07-01 20:15:21 +00002185PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002186initbz2(void)
2187{
2188 PyObject *m;
2189
2190 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002191 BZ2Comp_Type.ob_type = &PyType_Type;
2192 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002193
2194 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2195
2196 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2197
2198 Py_INCREF(&BZ2File_Type);
2199 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2200
2201 Py_INCREF(&BZ2Comp_Type);
2202 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2203
2204 Py_INCREF(&BZ2Decomp_Type);
2205 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2206}