blob: b5542bf968d3430a241707586f92898f7f2de10a [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimese93237d2007-12-19 02:37:44 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000238 int bytes_read;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000239 int newlinetypes = f->f_newlinetypes;
240 int skipnextlf = f->f_skipnextlf;
241 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242
243 total_v_size = n > 0 ? n : 100;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000244 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245 if (v == NULL)
246 return NULL;
247
248 buf = BUF(v);
249 end = buf + total_v_size;
250
251 for (;;) {
252 Py_BEGIN_ALLOW_THREADS
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000253 while (buf != end) {
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bytes_read == 0) break;
257 if (univ_newline) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000261 /* Seeing a \n here with skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 * saw a \r before.
263 */
264 newlinetypes |= NEWLINE_CRLF;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000265 if (bzerror != BZ_OK) break;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0) break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000269 } else {
270 newlinetypes |= NEWLINE_CR;
271 }
272 }
273 if (c == '\r') {
274 skipnextlf = 1;
275 c = '\n';
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000276 } else if (c == '\n')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000277 newlinetypes |= NEWLINE_LF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 }
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000279 *buf++ = c;
280 if (bzerror != BZ_OK || c == '\n') break;
281 }
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000285 f->f_newlinetypes = newlinetypes;
286 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000287 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->size = f->pos;
289 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290 break;
291 } else if (bzerror != BZ_OK) {
292 Util_CatchBZ2Error(bzerror);
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (c == '\n')
297 break;
298 /* Must be because buf == end */
299 if (n > 0)
300 break;
301 used_v_size = total_v_size;
302 increment = total_v_size >> 2; /* mild exponential growth */
303 total_v_size += increment;
304 if (total_v_size > INT_MAX) {
305 PyErr_SetString(PyExc_OverflowError,
306 "line is longer than a Python string can hold");
307 Py_DECREF(v);
308 return NULL;
309 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000310 if (_PyString_Resize(&v, total_v_size) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000311 return NULL;
312 buf = BUF(v) + used_v_size;
313 end = BUF(v) + total_v_size;
314 }
315
316 used_v_size = buf - BUF(v);
317 if (used_v_size != total_v_size)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000318 _PyString_Resize(&v, used_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000319 return v;
320}
321
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000322/* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
324size_t
325Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327{
328 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 int newlinetypes, skipnextlf;
330
331 assert(buf != NULL);
332 assert(stream != NULL);
333
334 if (!f->f_univ_newline)
335 return BZ2_bzRead(bzerror, stream, buf, n);
336
337 newlinetypes = f->f_newlinetypes;
338 skipnextlf = f->f_skipnextlf;
339
340 /* Invariant: n is the number of bytes remaining to be filled
341 * in the buffer.
342 */
343 while (n) {
344 size_t nread;
345 int shortread;
346 char *src = dst;
347
348 nread = BZ2_bzRead(bzerror, stream, dst, n);
349 assert(nread <= n);
350 n -= nread; /* assuming 1 byte out for each in; will adjust */
351 shortread = n != 0; /* true iff EOF or error */
352 while (nread--) {
353 char c = *src++;
354 if (c == '\r') {
355 /* Save as LF and set flag to skip next LF. */
356 *dst++ = '\n';
357 skipnextlf = 1;
358 }
359 else if (skipnextlf && c == '\n') {
360 /* Skip LF, and remember we saw CR LF. */
361 skipnextlf = 0;
362 newlinetypes |= NEWLINE_CRLF;
363 ++n;
364 }
365 else {
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
369 */
370 if (c == '\n')
371 newlinetypes |= NEWLINE_LF;
372 else if (skipnextlf)
373 newlinetypes |= NEWLINE_CR;
374 *dst++ = c;
375 skipnextlf = 0;
376 }
377 }
378 if (shortread) {
379 /* If this is EOF, update type flags. */
380 if (skipnextlf && *bzerror == BZ_STREAM_END)
381 newlinetypes |= NEWLINE_CR;
382 break;
383 }
384 }
385 f->f_newlinetypes = newlinetypes;
386 f->f_skipnextlf = skipnextlf;
387 return dst - buf;
388}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389
390/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
391static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000392Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000393{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394 if (f->f_buf != NULL) {
395 PyMem_Free(f->f_buf);
396 f->f_buf = NULL;
397 }
398}
399
400/* This is a hacked version of Python's fileobject.c:readahead(). */
401static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
404 int chunksize;
405 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406
407 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000408 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409 return 0;
410 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000411 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000414 f->f_bufptr = f->f_buf;
415 f->f_bufend = f->f_buf;
416 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 }
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Antoine Pitrou016b3662008-08-09 17:22:25 +0000419 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000420 return -1;
421 }
422 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
424 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000425 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000426 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 f->size = f->pos;
429 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 } else if (bzerror != BZ_OK) {
431 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000432 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000433 return -1;
434 }
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf + chunksize;
437 return 0;
438}
439
440/* This is a hacked version of Python's
441 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000442static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000443Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000444{
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 PyStringObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000446 char *bufptr;
447 char *buf;
448 int len;
449
450 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000451 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000452 return NULL;
453
454 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000455 if (len == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000456 return (PyStringObject *)
457 PyString_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000458 bufptr = memchr(f->f_bufptr, '\n', len);
459 if (bufptr != NULL) {
460 bufptr++; /* Count the '\n' */
461 len = bufptr - f->f_bufptr;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000462 s = (PyStringObject *)
463 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000464 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000465 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000466 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 f->f_bufptr = bufptr;
468 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000469 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 } else {
471 bufptr = f->f_bufptr;
472 buf = f->f_buf;
473 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 s = Util_ReadAheadGetLineSkip(f, skip+len,
475 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000476 if (s == NULL) {
477 PyMem_Free(buf);
478 return NULL;
479 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000480 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 PyMem_Free(buf);
482 }
483 return s;
484}
485
486/* ===================================================================== */
487/* Methods of BZ2File. */
488
489PyDoc_STRVAR(BZ2File_read__doc__,
490"read([size]) -> string\n\
491\n\
492Read at most size uncompressed bytes, returned as a string. If the size\n\
493argument is negative or omitted, read until EOF is reached.\n\
494");
495
496/* This is a hacked version of Python's fileobject.c:file_read(). */
497static PyObject *
498BZ2File_read(BZ2FileObject *self, PyObject *args)
499{
500 long bytesrequested = -1;
501 size_t bytesread, buffersize, chunksize;
502 int bzerror;
503 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000504
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000505 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
506 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000507
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000508 ACQUIRE_LOCK(self);
509 switch (self->mode) {
510 case MODE_READ:
511 break;
512 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000513 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000514 goto cleanup;
515 case MODE_CLOSED:
516 PyErr_SetString(PyExc_ValueError,
517 "I/O operation on closed file");
518 goto cleanup;
519 default:
520 PyErr_SetString(PyExc_IOError,
521 "file is not ready for reading");
522 goto cleanup;
523 }
524
525 if (bytesrequested < 0)
526 buffersize = Util_NewBufferSize((size_t)0);
527 else
528 buffersize = bytesrequested;
529 if (buffersize > INT_MAX) {
530 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000531 "requested number of bytes is "
532 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000533 goto cleanup;
534 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000535 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000536 if (ret == NULL)
537 goto cleanup;
538 bytesread = 0;
539
540 for (;;) {
541 Py_BEGIN_ALLOW_THREADS
542 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
543 BUF(ret)+bytesread,
544 buffersize-bytesread,
545 self);
546 self->pos += chunksize;
547 Py_END_ALLOW_THREADS
548 bytesread += chunksize;
549 if (bzerror == BZ_STREAM_END) {
550 self->size = self->pos;
551 self->mode = MODE_READ_EOF;
552 break;
553 } else if (bzerror != BZ_OK) {
554 Util_CatchBZ2Error(bzerror);
555 Py_DECREF(ret);
556 ret = NULL;
557 goto cleanup;
558 }
559 if (bytesrequested < 0) {
560 buffersize = Util_NewBufferSize(buffersize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000561 if (_PyString_Resize(&ret, buffersize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000562 goto cleanup;
563 } else {
564 break;
565 }
566 }
567 if (bytesread != buffersize)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000568 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000569
570cleanup:
571 RELEASE_LOCK(self);
572 return ret;
573}
574
575PyDoc_STRVAR(BZ2File_readline__doc__,
576"readline([size]) -> string\n\
577\n\
578Return the next line from the file, as a string, retaining newline.\n\
579A non-negative size argument will limit the maximum number of bytes to\n\
580return (an incomplete line may be returned then). Return an empty\n\
581string at EOF.\n\
582");
583
584static PyObject *
585BZ2File_readline(BZ2FileObject *self, PyObject *args)
586{
587 PyObject *ret = NULL;
588 int sizehint = -1;
589
590 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
591 return NULL;
592
593 ACQUIRE_LOCK(self);
594 switch (self->mode) {
595 case MODE_READ:
596 break;
597 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000598 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000599 goto cleanup;
600 case MODE_CLOSED:
601 PyErr_SetString(PyExc_ValueError,
602 "I/O operation on closed file");
603 goto cleanup;
604 default:
605 PyErr_SetString(PyExc_IOError,
606 "file is not ready for reading");
607 goto cleanup;
608 }
609
610 if (sizehint == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000611 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000612 else
613 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
614
615cleanup:
616 RELEASE_LOCK(self);
617 return ret;
618}
619
620PyDoc_STRVAR(BZ2File_readlines__doc__,
621"readlines([size]) -> list\n\
622\n\
623Call readline() repeatedly and return a list of lines read.\n\
624The optional size argument, if given, is an approximate bound on the\n\
625total number of bytes in the lines returned.\n\
626");
627
628/* This is a hacked version of Python's fileobject.c:file_readlines(). */
629static PyObject *
630BZ2File_readlines(BZ2FileObject *self, PyObject *args)
631{
632 long sizehint = 0;
633 PyObject *list = NULL;
634 PyObject *line;
635 char small_buffer[SMALLCHUNK];
636 char *buffer = small_buffer;
637 size_t buffersize = SMALLCHUNK;
638 PyObject *big_buffer = NULL;
639 size_t nfilled = 0;
640 size_t nread;
641 size_t totalread = 0;
642 char *p, *q, *end;
643 int err;
644 int shortread = 0;
645 int bzerror;
646
647 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
648 return NULL;
649
650 ACQUIRE_LOCK(self);
651 switch (self->mode) {
652 case MODE_READ:
653 break;
654 case MODE_READ_EOF:
655 list = PyList_New(0);
656 goto cleanup;
657 case MODE_CLOSED:
658 PyErr_SetString(PyExc_ValueError,
659 "I/O operation on closed file");
660 goto cleanup;
661 default:
662 PyErr_SetString(PyExc_IOError,
663 "file is not ready for reading");
664 goto cleanup;
665 }
666
667 if ((list = PyList_New(0)) == NULL)
668 goto cleanup;
669
670 for (;;) {
671 Py_BEGIN_ALLOW_THREADS
672 nread = Util_UnivNewlineRead(&bzerror, self->fp,
673 buffer+nfilled,
674 buffersize-nfilled, self);
675 self->pos += nread;
676 Py_END_ALLOW_THREADS
677 if (bzerror == BZ_STREAM_END) {
678 self->size = self->pos;
679 self->mode = MODE_READ_EOF;
680 if (nread == 0) {
681 sizehint = 0;
682 break;
683 }
684 shortread = 1;
685 } else if (bzerror != BZ_OK) {
686 Util_CatchBZ2Error(bzerror);
687 error:
688 Py_DECREF(list);
689 list = NULL;
690 goto cleanup;
691 }
692 totalread += nread;
693 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000694 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695 /* Need a larger buffer to fit this line */
696 nfilled += nread;
697 buffersize *= 2;
698 if (buffersize > INT_MAX) {
699 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000700 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000701 goto error;
702 }
703 if (big_buffer == NULL) {
704 /* Create the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000705 big_buffer = PyString_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 NULL, buffersize);
707 if (big_buffer == NULL)
708 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000709 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000710 memcpy(buffer, small_buffer, nfilled);
711 }
712 else {
713 /* Grow the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000714 _PyString_Resize(&big_buffer, buffersize);
715 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000716 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000717 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000718 }
719 end = buffer+nfilled+nread;
720 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000721 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 /* Process complete lines */
723 p++;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000724 line = PyString_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000725 if (line == NULL)
726 goto error;
727 err = PyList_Append(list, line);
728 Py_DECREF(line);
729 if (err != 0)
730 goto error;
731 q = p;
732 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000734 /* Move the remaining incomplete line to the start */
735 nfilled = end-q;
736 memmove(buffer, q, nfilled);
737 if (sizehint > 0)
738 if (totalread >= (size_t)sizehint)
739 break;
740 if (shortread) {
741 sizehint = 0;
742 break;
743 }
744 }
745 if (nfilled != 0) {
746 /* Partial last line */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000747 line = PyString_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000748 if (line == NULL)
749 goto error;
750 if (sizehint > 0) {
751 /* Need to complete the last line */
752 PyObject *rest = Util_GetLine(self, 0);
753 if (rest == NULL) {
754 Py_DECREF(line);
755 goto error;
756 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000757 PyString_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000758 Py_DECREF(rest);
759 if (line == NULL)
760 goto error;
761 }
762 err = PyList_Append(list, line);
763 Py_DECREF(line);
764 if (err != 0)
765 goto error;
766 }
767
768 cleanup:
769 RELEASE_LOCK(self);
770 if (big_buffer) {
771 Py_DECREF(big_buffer);
772 }
773 return list;
774}
775
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000776PyDoc_STRVAR(BZ2File_xreadlines__doc__,
777"xreadlines() -> self\n\
778\n\
779For backward compatibility. BZ2File objects now include the performance\n\
780optimizations previously implemented in the xreadlines module.\n\
781");
782
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000783PyDoc_STRVAR(BZ2File_write__doc__,
784"write(data) -> None\n\
785\n\
786Write the 'data' string to file. Note that due to buffering, close() may\n\
787be needed before the file on disk reflects the data written.\n\
788");
789
790/* This is a hacked version of Python's fileobject.c:file_write(). */
791static PyObject *
792BZ2File_write(BZ2FileObject *self, PyObject *args)
793{
794 PyObject *ret = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000795 Py_buffer pbuf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000796 char *buf;
797 int len;
798 int bzerror;
799
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000800 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000801 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000802 buf = pbuf.buf;
803 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000804
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 ACQUIRE_LOCK(self);
806 switch (self->mode) {
807 case MODE_WRITE:
808 break;
Tim Peterse3228092002-11-09 04:21:44 +0000809
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 case MODE_CLOSED:
811 PyErr_SetString(PyExc_ValueError,
812 "I/O operation on closed file");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000813 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000814
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000815 default:
816 PyErr_SetString(PyExc_IOError,
817 "file is not ready for writing");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000818 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000819 }
820
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000821 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000822
823 Py_BEGIN_ALLOW_THREADS
824 BZ2_bzWrite (&bzerror, self->fp, buf, len);
825 self->pos += len;
826 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000827
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 if (bzerror != BZ_OK) {
829 Util_CatchBZ2Error(bzerror);
830 goto cleanup;
831 }
Tim Peterse3228092002-11-09 04:21:44 +0000832
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000833 Py_INCREF(Py_None);
834 ret = Py_None;
835
836cleanup:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000837 PyBuffer_Release(&pbuf);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000838 RELEASE_LOCK(self);
839 return ret;
840}
841
842PyDoc_STRVAR(BZ2File_writelines__doc__,
843"writelines(sequence_of_strings) -> None\n\
844\n\
845Write the sequence of strings to the file. Note that newlines are not\n\
846added. The sequence can be any iterable object producing strings. This is\n\
847equivalent to calling write() for each string.\n\
848");
849
850/* This is a hacked version of Python's fileobject.c:file_writelines(). */
851static PyObject *
852BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
853{
854#define CHUNKSIZE 1000
855 PyObject *list = NULL;
856 PyObject *iter = NULL;
857 PyObject *ret = NULL;
858 PyObject *line;
859 int i, j, index, len, islist;
860 int bzerror;
861
862 ACQUIRE_LOCK(self);
Georg Brandl3335a7a2006-08-14 21:42:55 +0000863 switch (self->mode) {
864 case MODE_WRITE:
865 break;
866
867 case MODE_CLOSED:
868 PyErr_SetString(PyExc_ValueError,
869 "I/O operation on closed file");
870 goto error;
871
872 default:
873 PyErr_SetString(PyExc_IOError,
874 "file is not ready for writing");
875 goto error;
876 }
877
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000878 islist = PyList_Check(seq);
879 if (!islist) {
880 iter = PyObject_GetIter(seq);
881 if (iter == NULL) {
882 PyErr_SetString(PyExc_TypeError,
883 "writelines() requires an iterable argument");
884 goto error;
885 }
886 list = PyList_New(CHUNKSIZE);
887 if (list == NULL)
888 goto error;
889 }
890
891 /* Strategy: slurp CHUNKSIZE lines into a private list,
892 checking that they are all strings, then write that list
893 without holding the interpreter lock, then come back for more. */
894 for (index = 0; ; index += CHUNKSIZE) {
895 if (islist) {
896 Py_XDECREF(list);
897 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
898 if (list == NULL)
899 goto error;
900 j = PyList_GET_SIZE(list);
901 }
902 else {
903 for (j = 0; j < CHUNKSIZE; j++) {
904 line = PyIter_Next(iter);
905 if (line == NULL) {
906 if (PyErr_Occurred())
907 goto error;
908 break;
909 }
910 PyList_SetItem(list, j, line);
911 }
912 }
913 if (j == 0)
914 break;
915
916 /* Check that all entries are indeed strings. If not,
917 apply the same rules as for file.write() and
918 convert the rets to strings. This is slow, but
919 seems to be the only way since all conversion APIs
920 could potentially execute Python code. */
921 for (i = 0; i < j; i++) {
922 PyObject *v = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000923 if (!PyString_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000924 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000925 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000926 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
927 PyErr_SetString(PyExc_TypeError,
928 "writelines() "
929 "argument must be "
930 "a sequence of "
931 "strings");
932 goto error;
933 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000934 line = PyString_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000935 len);
936 if (line == NULL)
937 goto error;
938 Py_DECREF(v);
939 PyList_SET_ITEM(list, i, line);
940 }
941 }
942
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000943 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000944
945 /* Since we are releasing the global lock, the
946 following code may *not* execute Python code. */
947 Py_BEGIN_ALLOW_THREADS
948 for (i = 0; i < j; i++) {
949 line = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000950 len = PyString_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000951 BZ2_bzWrite (&bzerror, self->fp,
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000952 PyString_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000953 if (bzerror != BZ_OK) {
954 Py_BLOCK_THREADS
955 Util_CatchBZ2Error(bzerror);
956 goto error;
957 }
958 }
959 Py_END_ALLOW_THREADS
960
961 if (j < CHUNKSIZE)
962 break;
963 }
964
965 Py_INCREF(Py_None);
966 ret = Py_None;
967
968 error:
969 RELEASE_LOCK(self);
970 Py_XDECREF(list);
971 Py_XDECREF(iter);
972 return ret;
973#undef CHUNKSIZE
974}
975
976PyDoc_STRVAR(BZ2File_seek__doc__,
977"seek(offset [, whence]) -> None\n\
978\n\
979Move to new file position. Argument offset is a byte count. Optional\n\
980argument whence defaults to 0 (offset from start of file, offset\n\
981should be >= 0); other values are 1 (move relative to current position,\n\
982positive or negative), and 2 (move relative to end of file, usually\n\
983negative, although many platforms allow seeking beyond the end of a file).\n\
984\n\
985Note that seeking of bz2 files is emulated, and depending on the parameters\n\
986the operation may be extremely slow.\n\
987");
988
989static PyObject *
990BZ2File_seek(BZ2FileObject *self, PyObject *args)
991{
992 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000993 PyObject *offobj;
994 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000995 char small_buffer[SMALLCHUNK];
996 char *buffer = small_buffer;
997 size_t buffersize = SMALLCHUNK;
Andrew M. Kuchling44b054b2006-12-18 19:22:24 +0000998 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000999 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 int chunksize;
1001 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001002 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001003
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001004 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1005 return NULL;
1006#if !defined(HAVE_LARGEFILE_SUPPORT)
1007 offset = PyInt_AsLong(offobj);
1008#else
1009 offset = PyLong_Check(offobj) ?
1010 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1011#endif
1012 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001013 return NULL;
1014
1015 ACQUIRE_LOCK(self);
1016 Util_DropReadAhead(self);
1017 switch (self->mode) {
1018 case MODE_READ:
1019 case MODE_READ_EOF:
1020 break;
Tim Peterse3228092002-11-09 04:21:44 +00001021
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001022 case MODE_CLOSED:
1023 PyErr_SetString(PyExc_ValueError,
1024 "I/O operation on closed file");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001025 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001026
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001027 default:
1028 PyErr_SetString(PyExc_IOError,
1029 "seek works only while reading");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001030 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031 }
1032
Georg Brandl47fab922006-02-18 21:57:25 +00001033 if (where == 2) {
1034 if (self->size == -1) {
1035 assert(self->mode != MODE_READ_EOF);
1036 for (;;) {
1037 Py_BEGIN_ALLOW_THREADS
1038 chunksize = Util_UnivNewlineRead(
1039 &bzerror, self->fp,
1040 buffer, buffersize,
1041 self);
1042 self->pos += chunksize;
1043 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001044
Georg Brandl47fab922006-02-18 21:57:25 +00001045 bytesread += chunksize;
1046 if (bzerror == BZ_STREAM_END) {
1047 break;
1048 } else if (bzerror != BZ_OK) {
1049 Util_CatchBZ2Error(bzerror);
1050 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001051 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052 }
Georg Brandl47fab922006-02-18 21:57:25 +00001053 self->mode = MODE_READ_EOF;
1054 self->size = self->pos;
1055 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 }
Georg Brandl47fab922006-02-18 21:57:25 +00001057 offset = self->size + offset;
1058 } else if (where == 1) {
1059 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001060 }
1061
Georg Brandl47fab922006-02-18 21:57:25 +00001062 /* Before getting here, offset must be the absolute position the file
1063 * pointer should be set to. */
1064
1065 if (offset >= self->pos) {
1066 /* we can move forward */
1067 offset -= self->pos;
1068 } else {
1069 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 BZ2_bzReadClose(&bzerror, self->fp);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001071 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001072 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001073 self->fp = NULL;
1074 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001075 if (bzerror != BZ_OK) {
1076 Util_CatchBZ2Error(bzerror);
1077 goto cleanup;
1078 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001079 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080 if (!ret)
1081 goto cleanup;
1082 Py_DECREF(ret);
1083 ret = NULL;
1084 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001085 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001086 0, 0, NULL, 0);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001087 if (self->fp)
Gregory P. Smith73bee442008-04-12 20:37:48 +00001088 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001089 if (bzerror != BZ_OK) {
1090 Util_CatchBZ2Error(bzerror);
1091 goto cleanup;
1092 }
1093 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094 }
1095
Georg Brandl47fab922006-02-18 21:57:25 +00001096 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001097 goto exit;
1098
1099 /* Before getting here, offset must be set to the number of bytes
1100 * to walk forward. */
1101 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001102 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001103 readsize = buffersize;
1104 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001105 /* offset might be wider that readsize, but the result
1106 * of the subtraction is bound by buffersize (see the
1107 * condition above). buffersize is 8192. */
1108 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001109 Py_BEGIN_ALLOW_THREADS
1110 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1111 buffer, readsize, self);
1112 self->pos += chunksize;
1113 Py_END_ALLOW_THREADS
1114 bytesread += chunksize;
1115 if (bzerror == BZ_STREAM_END) {
1116 self->size = self->pos;
1117 self->mode = MODE_READ_EOF;
1118 break;
1119 } else if (bzerror != BZ_OK) {
1120 Util_CatchBZ2Error(bzerror);
1121 goto cleanup;
1122 }
1123 if (bytesread == offset)
1124 break;
1125 }
1126
1127exit:
1128 Py_INCREF(Py_None);
1129 ret = Py_None;
1130
1131cleanup:
1132 RELEASE_LOCK(self);
1133 return ret;
1134}
1135
1136PyDoc_STRVAR(BZ2File_tell__doc__,
1137"tell() -> int\n\
1138\n\
1139Return the current file position, an integer (may be a long integer).\n\
1140");
1141
1142static PyObject *
1143BZ2File_tell(BZ2FileObject *self, PyObject *args)
1144{
1145 PyObject *ret = NULL;
1146
1147 if (self->mode == MODE_CLOSED) {
1148 PyErr_SetString(PyExc_ValueError,
1149 "I/O operation on closed file");
1150 goto cleanup;
1151 }
1152
Georg Brandla8bcecc2005-09-03 07:49:53 +00001153#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001155#else
1156 ret = PyLong_FromLongLong(self->pos);
1157#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001158
1159cleanup:
1160 return ret;
1161}
1162
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163PyDoc_STRVAR(BZ2File_close__doc__,
1164"close() -> None or (perhaps) an integer\n\
1165\n\
1166Close the file. Sets data attribute .closed to true. A closed file\n\
1167cannot be used for further I/O operations. close() may be called more\n\
1168than once without error.\n\
1169");
1170
1171static PyObject *
1172BZ2File_close(BZ2FileObject *self)
1173{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001174 PyObject *ret = NULL;
1175 int bzerror = BZ_OK;
1176
1177 ACQUIRE_LOCK(self);
1178 switch (self->mode) {
1179 case MODE_READ:
1180 case MODE_READ_EOF:
1181 BZ2_bzReadClose(&bzerror, self->fp);
1182 break;
1183 case MODE_WRITE:
1184 BZ2_bzWriteClose(&bzerror, self->fp,
1185 0, NULL, NULL);
1186 break;
1187 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001188 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001189 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001190 self->fp = NULL;
1191 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001193 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194 if (bzerror != BZ_OK) {
1195 Util_CatchBZ2Error(bzerror);
1196 Py_XDECREF(ret);
1197 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198 }
1199
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200 RELEASE_LOCK(self);
1201 return ret;
1202}
1203
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001204static PyObject *BZ2File_getiter(BZ2FileObject *self);
1205
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001206static PyMethodDef BZ2File_methods[] = {
1207 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1208 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1209 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001210 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001211 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1212 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1213 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1214 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001215 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1216 {NULL, NULL} /* sentinel */
1217};
1218
1219
1220/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001221/* Getters and setters of BZ2File. */
1222
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001223/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1224static PyObject *
1225BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1226{
1227 switch (self->f_newlinetypes) {
1228 case NEWLINE_UNKNOWN:
1229 Py_INCREF(Py_None);
1230 return Py_None;
1231 case NEWLINE_CR:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001232 return PyString_FromString("\r");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001233 case NEWLINE_LF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001234 return PyString_FromString("\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001235 case NEWLINE_CR|NEWLINE_LF:
1236 return Py_BuildValue("(ss)", "\r", "\n");
1237 case NEWLINE_CRLF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001238 return PyString_FromString("\r\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001239 case NEWLINE_CR|NEWLINE_CRLF:
1240 return Py_BuildValue("(ss)", "\r", "\r\n");
1241 case NEWLINE_LF|NEWLINE_CRLF:
1242 return Py_BuildValue("(ss)", "\n", "\r\n");
1243 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1244 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1245 default:
1246 PyErr_Format(PyExc_SystemError,
1247 "Unknown newlines value 0x%x\n",
1248 self->f_newlinetypes);
1249 return NULL;
1250 }
1251}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001252
1253static PyObject *
1254BZ2File_get_closed(BZ2FileObject *self, void *closure)
1255{
1256 return PyInt_FromLong(self->mode == MODE_CLOSED);
1257}
1258
1259static PyObject *
1260BZ2File_get_mode(BZ2FileObject *self, void *closure)
1261{
1262 return PyObject_GetAttrString(self->file, "mode");
1263}
1264
1265static PyObject *
1266BZ2File_get_name(BZ2FileObject *self, void *closure)
1267{
1268 return PyObject_GetAttrString(self->file, "name");
1269}
1270
1271static PyGetSetDef BZ2File_getset[] = {
1272 {"closed", (getter)BZ2File_get_closed, NULL,
1273 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001274 {"newlines", (getter)BZ2File_get_newlines, NULL,
1275 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001276 {"mode", (getter)BZ2File_get_mode, NULL,
1277 "file mode ('r', 'w', or 'U')"},
1278 {"name", (getter)BZ2File_get_name, NULL,
1279 "file name"},
1280 {NULL} /* Sentinel */
1281};
1282
1283
1284/* ===================================================================== */
1285/* Members of BZ2File_Type. */
1286
1287#undef OFF
1288#define OFF(x) offsetof(BZ2FileObject, x)
1289
1290static PyMemberDef BZ2File_members[] = {
1291 {"softspace", T_INT, OFF(f_softspace), 0,
1292 "flag indicating that a space needs to be printed; used by print"},
1293 {NULL} /* Sentinel */
1294};
1295
1296/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001297/* Slot definitions for BZ2File_Type. */
1298
1299static int
1300BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1301{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001302 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001303 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001304 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305 char *mode = "r";
1306 int buffering = -1;
1307 int compresslevel = 9;
1308 int bzerror;
1309 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001310
1311 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001312
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001313 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1314 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001315 &compresslevel))
1316 return -1;
1317
1318 if (compresslevel < 1 || compresslevel > 9) {
1319 PyErr_SetString(PyExc_ValueError,
1320 "compresslevel must be between 1 and 9");
1321 return -1;
1322 }
1323
1324 for (;;) {
1325 int error = 0;
1326 switch (*mode) {
1327 case 'r':
1328 case 'w':
1329 if (mode_char)
1330 error = 1;
1331 mode_char = *mode;
1332 break;
1333
1334 case 'b':
1335 break;
1336
1337 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001338#ifdef __VMS
1339 self->f_univ_newline = 0;
1340#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001341 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001342#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343 break;
1344
1345 default:
1346 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001347 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348 }
1349 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001350 PyErr_Format(PyExc_ValueError,
1351 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001352 return -1;
1353 }
1354 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001355 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001356 break;
1357 }
1358
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001359 if (mode_char == 0) {
1360 mode_char = 'r';
1361 }
1362
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001363 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001364
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001365 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1366 name, mode, buffering);
1367 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001368 return -1;
1369
1370 /* From now on, we have stuff to dealloc, so jump to error label
1371 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001372
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373#ifdef WITH_THREAD
1374 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001375 if (!self->lock) {
1376 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001378 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001379#endif
1380
1381 if (mode_char == 'r')
1382 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001383 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001384 0, 0, NULL, 0);
1385 else
1386 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001387 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001388 compresslevel, 0, 0);
1389
1390 if (bzerror != BZ_OK) {
1391 Util_CatchBZ2Error(bzerror);
1392 goto error;
1393 }
Gregory P. Smith73bee442008-04-12 20:37:48 +00001394 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001395
1396 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1397
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398 return 0;
1399
1400error:
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001401 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001403 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001404 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001405 self->lock = NULL;
1406 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001407#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001408 return -1;
1409}
1410
1411static void
1412BZ2File_dealloc(BZ2FileObject *self)
1413{
1414 int bzerror;
1415#ifdef WITH_THREAD
1416 if (self->lock)
1417 PyThread_free_lock(self->lock);
1418#endif
1419 switch (self->mode) {
1420 case MODE_READ:
1421 case MODE_READ_EOF:
1422 BZ2_bzReadClose(&bzerror, self->fp);
1423 break;
1424 case MODE_WRITE:
1425 BZ2_bzWriteClose(&bzerror, self->fp,
1426 0, NULL, NULL);
1427 break;
1428 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001429 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001430 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001431 self->fp = NULL;
1432 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001433 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001434 Py_XDECREF(self->file);
Christian Heimese93237d2007-12-19 02:37:44 +00001435 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001436}
1437
1438/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1439static PyObject *
1440BZ2File_getiter(BZ2FileObject *self)
1441{
1442 if (self->mode == MODE_CLOSED) {
1443 PyErr_SetString(PyExc_ValueError,
1444 "I/O operation on closed file");
1445 return NULL;
1446 }
1447 Py_INCREF((PyObject*)self);
1448 return (PyObject *)self;
1449}
1450
1451/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1452#define READAHEAD_BUFSIZE 8192
1453static PyObject *
1454BZ2File_iternext(BZ2FileObject *self)
1455{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001456 PyStringObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001457 ACQUIRE_LOCK(self);
1458 if (self->mode == MODE_CLOSED) {
Gregory P. Smith3b1e6b22008-07-07 04:31:58 +00001459 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001460 PyErr_SetString(PyExc_ValueError,
1461 "I/O operation on closed file");
1462 return NULL;
1463 }
1464 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1465 RELEASE_LOCK(self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001466 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467 Py_XDECREF(ret);
1468 return NULL;
1469 }
1470 return (PyObject *)ret;
1471}
1472
1473/* ===================================================================== */
1474/* BZ2File_Type definition. */
1475
1476PyDoc_VAR(BZ2File__doc__) =
1477PyDoc_STR(
1478"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1479\n\
1480Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1481writing. When opened for writing, the file will be created if it doesn't\n\
1482exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1483unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1484is given, must be a number between 1 and 9.\n\
1485")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001486PyDoc_STR(
1487"\n\
1488Add a 'U' to mode to open the file for input with universal newline\n\
1489support. Any line ending in the input file will be seen as a '\\n' in\n\
1490Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1491for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1492'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1493newlines are available only when reading.\n\
1494")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001495;
1496
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001497static PyTypeObject BZ2File_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001498 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001499 "bz2.BZ2File", /*tp_name*/
1500 sizeof(BZ2FileObject), /*tp_basicsize*/
1501 0, /*tp_itemsize*/
1502 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1503 0, /*tp_print*/
1504 0, /*tp_getattr*/
1505 0, /*tp_setattr*/
1506 0, /*tp_compare*/
1507 0, /*tp_repr*/
1508 0, /*tp_as_number*/
1509 0, /*tp_as_sequence*/
1510 0, /*tp_as_mapping*/
1511 0, /*tp_hash*/
1512 0, /*tp_call*/
1513 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001514 PyObject_GenericGetAttr,/*tp_getattro*/
1515 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001516 0, /*tp_as_buffer*/
1517 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1518 BZ2File__doc__, /*tp_doc*/
1519 0, /*tp_traverse*/
1520 0, /*tp_clear*/
1521 0, /*tp_richcompare*/
1522 0, /*tp_weaklistoffset*/
1523 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1524 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1525 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001526 BZ2File_members, /*tp_members*/
1527 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001528 0, /*tp_base*/
1529 0, /*tp_dict*/
1530 0, /*tp_descr_get*/
1531 0, /*tp_descr_set*/
1532 0, /*tp_dictoffset*/
1533 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001534 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001535 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001536 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001537 0, /*tp_is_gc*/
1538};
1539
1540
1541/* ===================================================================== */
1542/* Methods of BZ2Comp. */
1543
1544PyDoc_STRVAR(BZ2Comp_compress__doc__,
1545"compress(data) -> string\n\
1546\n\
1547Provide more data to the compressor object. It will return chunks of\n\
1548compressed data whenever possible. When you've finished providing data\n\
1549to compress, call the flush() method to finish the compression process,\n\
1550and return what is left in the internal buffers.\n\
1551");
1552
1553static PyObject *
1554BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1555{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001556 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001557 char *data;
1558 int datasize;
1559 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001560 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001561 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001562 bz_stream *bzs = &self->bzs;
1563 int bzerror;
1564
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001565 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001566 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001567 data = pdata.buf;
1568 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001569
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001570 if (datasize == 0) {
1571 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001572 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001573 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001574
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001575 ACQUIRE_LOCK(self);
1576 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001577 PyErr_SetString(PyExc_ValueError,
1578 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001579 goto error;
1580 }
1581
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001582 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001583 if (!ret)
1584 goto error;
1585
1586 bzs->next_in = data;
1587 bzs->avail_in = datasize;
1588 bzs->next_out = BUF(ret);
1589 bzs->avail_out = bufsize;
1590
1591 totalout = BZS_TOTAL_OUT(bzs);
1592
1593 for (;;) {
1594 Py_BEGIN_ALLOW_THREADS
1595 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1596 Py_END_ALLOW_THREADS
1597 if (bzerror != BZ_RUN_OK) {
1598 Util_CatchBZ2Error(bzerror);
1599 goto error;
1600 }
Georg Brandla47337f2007-03-13 12:34:25 +00001601 if (bzs->avail_in == 0)
1602 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001603 if (bzs->avail_out == 0) {
1604 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001605 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001606 BZ2_bzCompressEnd(bzs);
1607 goto error;
1608 }
1609 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1610 - totalout);
1611 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001612 }
1613 }
1614
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001615 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001616
1617 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001618 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001619 return ret;
1620
1621error:
1622 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001623 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001624 Py_XDECREF(ret);
1625 return NULL;
1626}
1627
1628PyDoc_STRVAR(BZ2Comp_flush__doc__,
1629"flush() -> string\n\
1630\n\
1631Finish the compression process and return what is left in internal buffers.\n\
1632You must not use the compressor object after calling this method.\n\
1633");
1634
1635static PyObject *
1636BZ2Comp_flush(BZ2CompObject *self)
1637{
1638 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001639 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001641 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001642 int bzerror;
1643
1644 ACQUIRE_LOCK(self);
1645 if (!self->running) {
1646 PyErr_SetString(PyExc_ValueError, "object was already "
1647 "flushed");
1648 goto error;
1649 }
1650 self->running = 0;
1651
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001652 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001653 if (!ret)
1654 goto error;
1655
1656 bzs->next_out = BUF(ret);
1657 bzs->avail_out = bufsize;
1658
1659 totalout = BZS_TOTAL_OUT(bzs);
1660
1661 for (;;) {
1662 Py_BEGIN_ALLOW_THREADS
1663 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1664 Py_END_ALLOW_THREADS
1665 if (bzerror == BZ_STREAM_END) {
1666 break;
1667 } else if (bzerror != BZ_FINISH_OK) {
1668 Util_CatchBZ2Error(bzerror);
1669 goto error;
1670 }
1671 if (bzs->avail_out == 0) {
1672 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001673 if (_PyString_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001674 goto error;
1675 bzs->next_out = BUF(ret);
1676 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1677 - totalout);
1678 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1679 }
1680 }
1681
1682 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001683 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001684
1685 RELEASE_LOCK(self);
1686 return ret;
1687
1688error:
1689 RELEASE_LOCK(self);
1690 Py_XDECREF(ret);
1691 return NULL;
1692}
1693
1694static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001695 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1696 BZ2Comp_compress__doc__},
1697 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1698 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001699 {NULL, NULL} /* sentinel */
1700};
1701
1702
1703/* ===================================================================== */
1704/* Slot definitions for BZ2Comp_Type. */
1705
1706static int
1707BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1708{
1709 int compresslevel = 9;
1710 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001711 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001712
1713 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1714 kwlist, &compresslevel))
1715 return -1;
1716
1717 if (compresslevel < 1 || compresslevel > 9) {
1718 PyErr_SetString(PyExc_ValueError,
1719 "compresslevel must be between 1 and 9");
1720 goto error;
1721 }
1722
1723#ifdef WITH_THREAD
1724 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001725 if (!self->lock) {
1726 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001727 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001728 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001729#endif
1730
1731 memset(&self->bzs, 0, sizeof(bz_stream));
1732 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1733 if (bzerror != BZ_OK) {
1734 Util_CatchBZ2Error(bzerror);
1735 goto error;
1736 }
1737
1738 self->running = 1;
1739
1740 return 0;
1741error:
1742#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001743 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001745 self->lock = NULL;
1746 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001747#endif
1748 return -1;
1749}
1750
1751static void
1752BZ2Comp_dealloc(BZ2CompObject *self)
1753{
1754#ifdef WITH_THREAD
1755 if (self->lock)
1756 PyThread_free_lock(self->lock);
1757#endif
1758 BZ2_bzCompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001759 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001760}
1761
1762
1763/* ===================================================================== */
1764/* BZ2Comp_Type definition. */
1765
1766PyDoc_STRVAR(BZ2Comp__doc__,
1767"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1768\n\
1769Create a new compressor object. This object may be used to compress\n\
1770data sequentially. If you want to compress data in one shot, use the\n\
1771compress() function instead. The compresslevel parameter, if given,\n\
1772must be a number between 1 and 9.\n\
1773");
1774
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001775static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001776 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001777 "bz2.BZ2Compressor", /*tp_name*/
1778 sizeof(BZ2CompObject), /*tp_basicsize*/
1779 0, /*tp_itemsize*/
1780 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1781 0, /*tp_print*/
1782 0, /*tp_getattr*/
1783 0, /*tp_setattr*/
1784 0, /*tp_compare*/
1785 0, /*tp_repr*/
1786 0, /*tp_as_number*/
1787 0, /*tp_as_sequence*/
1788 0, /*tp_as_mapping*/
1789 0, /*tp_hash*/
1790 0, /*tp_call*/
1791 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001792 PyObject_GenericGetAttr,/*tp_getattro*/
1793 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001794 0, /*tp_as_buffer*/
1795 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1796 BZ2Comp__doc__, /*tp_doc*/
1797 0, /*tp_traverse*/
1798 0, /*tp_clear*/
1799 0, /*tp_richcompare*/
1800 0, /*tp_weaklistoffset*/
1801 0, /*tp_iter*/
1802 0, /*tp_iternext*/
1803 BZ2Comp_methods, /*tp_methods*/
1804 0, /*tp_members*/
1805 0, /*tp_getset*/
1806 0, /*tp_base*/
1807 0, /*tp_dict*/
1808 0, /*tp_descr_get*/
1809 0, /*tp_descr_set*/
1810 0, /*tp_dictoffset*/
1811 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001812 PyType_GenericAlloc, /*tp_alloc*/
1813 PyType_GenericNew, /*tp_new*/
1814 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001815 0, /*tp_is_gc*/
1816};
1817
1818
1819/* ===================================================================== */
1820/* Members of BZ2Decomp. */
1821
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001822#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001823#define OFF(x) offsetof(BZ2DecompObject, x)
1824
1825static PyMemberDef BZ2Decomp_members[] = {
1826 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1827 {NULL} /* Sentinel */
1828};
1829
1830
1831/* ===================================================================== */
1832/* Methods of BZ2Decomp. */
1833
1834PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1835"decompress(data) -> string\n\
1836\n\
1837Provide more data to the decompressor object. It will return chunks\n\
1838of decompressed data whenever possible. If you try to decompress data\n\
1839after the end of stream is found, EOFError will be raised. If any data\n\
1840was found after the end of stream, it'll be ignored and saved in\n\
1841unused_data attribute.\n\
1842");
1843
1844static PyObject *
1845BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1846{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001847 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001848 char *data;
1849 int datasize;
1850 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001851 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001852 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001853 bz_stream *bzs = &self->bzs;
1854 int bzerror;
1855
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001856 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001857 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001858 data = pdata.buf;
1859 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860
1861 ACQUIRE_LOCK(self);
1862 if (!self->running) {
1863 PyErr_SetString(PyExc_EOFError, "end of stream was "
1864 "already found");
1865 goto error;
1866 }
1867
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001868 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001869 if (!ret)
1870 goto error;
1871
1872 bzs->next_in = data;
1873 bzs->avail_in = datasize;
1874 bzs->next_out = BUF(ret);
1875 bzs->avail_out = bufsize;
1876
1877 totalout = BZS_TOTAL_OUT(bzs);
1878
1879 for (;;) {
1880 Py_BEGIN_ALLOW_THREADS
1881 bzerror = BZ2_bzDecompress(bzs);
1882 Py_END_ALLOW_THREADS
1883 if (bzerror == BZ_STREAM_END) {
1884 if (bzs->avail_in != 0) {
1885 Py_DECREF(self->unused_data);
1886 self->unused_data =
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001887 PyString_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001888 bzs->avail_in);
1889 }
1890 self->running = 0;
1891 break;
1892 }
1893 if (bzerror != BZ_OK) {
1894 Util_CatchBZ2Error(bzerror);
1895 goto error;
1896 }
Georg Brandla47337f2007-03-13 12:34:25 +00001897 if (bzs->avail_in == 0)
1898 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001899 if (bzs->avail_out == 0) {
1900 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001901 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001902 BZ2_bzDecompressEnd(bzs);
1903 goto error;
1904 }
1905 bzs->next_out = BUF(ret);
1906 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1907 - totalout);
1908 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001909 }
1910 }
1911
1912 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001913 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914
1915 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001916 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001917 return ret;
1918
1919error:
1920 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001921 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001922 Py_XDECREF(ret);
1923 return NULL;
1924}
1925
1926static PyMethodDef BZ2Decomp_methods[] = {
1927 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1928 {NULL, NULL} /* sentinel */
1929};
1930
1931
1932/* ===================================================================== */
1933/* Slot definitions for BZ2Decomp_Type. */
1934
1935static int
1936BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1937{
1938 int bzerror;
1939
1940 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1941 return -1;
1942
1943#ifdef WITH_THREAD
1944 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001945 if (!self->lock) {
1946 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001947 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001948 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949#endif
1950
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001951 self->unused_data = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001952 if (!self->unused_data)
1953 goto error;
1954
1955 memset(&self->bzs, 0, sizeof(bz_stream));
1956 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1957 if (bzerror != BZ_OK) {
1958 Util_CatchBZ2Error(bzerror);
1959 goto error;
1960 }
1961
1962 self->running = 1;
1963
1964 return 0;
1965
1966error:
1967#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001968 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001969 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001970 self->lock = NULL;
1971 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001972#endif
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001973 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001974 return -1;
1975}
1976
1977static void
1978BZ2Decomp_dealloc(BZ2DecompObject *self)
1979{
1980#ifdef WITH_THREAD
1981 if (self->lock)
1982 PyThread_free_lock(self->lock);
1983#endif
1984 Py_XDECREF(self->unused_data);
1985 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001986 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001987}
1988
1989
1990/* ===================================================================== */
1991/* BZ2Decomp_Type definition. */
1992
1993PyDoc_STRVAR(BZ2Decomp__doc__,
1994"BZ2Decompressor() -> decompressor object\n\
1995\n\
1996Create a new decompressor object. This object may be used to decompress\n\
1997data sequentially. If you want to decompress data in one shot, use the\n\
1998decompress() function instead.\n\
1999");
2000
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002001static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00002002 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002003 "bz2.BZ2Decompressor", /*tp_name*/
2004 sizeof(BZ2DecompObject), /*tp_basicsize*/
2005 0, /*tp_itemsize*/
2006 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2007 0, /*tp_print*/
2008 0, /*tp_getattr*/
2009 0, /*tp_setattr*/
2010 0, /*tp_compare*/
2011 0, /*tp_repr*/
2012 0, /*tp_as_number*/
2013 0, /*tp_as_sequence*/
2014 0, /*tp_as_mapping*/
2015 0, /*tp_hash*/
2016 0, /*tp_call*/
2017 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002018 PyObject_GenericGetAttr,/*tp_getattro*/
2019 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002020 0, /*tp_as_buffer*/
2021 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2022 BZ2Decomp__doc__, /*tp_doc*/
2023 0, /*tp_traverse*/
2024 0, /*tp_clear*/
2025 0, /*tp_richcompare*/
2026 0, /*tp_weaklistoffset*/
2027 0, /*tp_iter*/
2028 0, /*tp_iternext*/
2029 BZ2Decomp_methods, /*tp_methods*/
2030 BZ2Decomp_members, /*tp_members*/
2031 0, /*tp_getset*/
2032 0, /*tp_base*/
2033 0, /*tp_dict*/
2034 0, /*tp_descr_get*/
2035 0, /*tp_descr_set*/
2036 0, /*tp_dictoffset*/
2037 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002038 PyType_GenericAlloc, /*tp_alloc*/
2039 PyType_GenericNew, /*tp_new*/
2040 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002041 0, /*tp_is_gc*/
2042};
2043
2044
2045/* ===================================================================== */
2046/* Module functions. */
2047
2048PyDoc_STRVAR(bz2_compress__doc__,
2049"compress(data [, compresslevel=9]) -> string\n\
2050\n\
2051Compress data in one shot. If you want to compress data sequentially,\n\
2052use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2053given, must be a number between 1 and 9.\n\
2054");
2055
2056static PyObject *
2057bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2058{
2059 int compresslevel=9;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002060 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002061 char *data;
2062 int datasize;
2063 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002064 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065 bz_stream _bzs;
2066 bz_stream *bzs = &_bzs;
2067 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002068 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002069
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002070 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2071 kwlist, &pdata,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002072 &compresslevel))
2073 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002074 data = pdata.buf;
2075 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002076
2077 if (compresslevel < 1 || compresslevel > 9) {
2078 PyErr_SetString(PyExc_ValueError,
2079 "compresslevel must be between 1 and 9");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002080 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002081 return NULL;
2082 }
2083
2084 /* Conforming to bz2 manual, this is large enough to fit compressed
2085 * data in one shot. We will check it later anyway. */
2086 bufsize = datasize + (datasize/100+1) + 600;
2087
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002088 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002089 if (!ret) {
2090 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002091 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002092 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002093
2094 memset(bzs, 0, sizeof(bz_stream));
2095
2096 bzs->next_in = data;
2097 bzs->avail_in = datasize;
2098 bzs->next_out = BUF(ret);
2099 bzs->avail_out = bufsize;
2100
2101 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2102 if (bzerror != BZ_OK) {
2103 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002104 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002105 Py_DECREF(ret);
2106 return NULL;
2107 }
Tim Peterse3228092002-11-09 04:21:44 +00002108
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002109 for (;;) {
2110 Py_BEGIN_ALLOW_THREADS
2111 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2112 Py_END_ALLOW_THREADS
2113 if (bzerror == BZ_STREAM_END) {
2114 break;
2115 } else if (bzerror != BZ_FINISH_OK) {
2116 BZ2_bzCompressEnd(bzs);
2117 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002118 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002119 Py_DECREF(ret);
2120 return NULL;
2121 }
2122 if (bzs->avail_out == 0) {
2123 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002124 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002125 BZ2_bzCompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002126 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002127 Py_DECREF(ret);
2128 return NULL;
2129 }
2130 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2131 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2132 }
2133 }
2134
2135 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002136 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002137 BZ2_bzCompressEnd(bzs);
2138
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002139 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002140 return ret;
2141}
2142
2143PyDoc_STRVAR(bz2_decompress__doc__,
2144"decompress(data) -> decompressed data\n\
2145\n\
2146Decompress data in one shot. If you want to decompress data sequentially,\n\
2147use an instance of BZ2Decompressor instead.\n\
2148");
2149
2150static PyObject *
2151bz2_decompress(PyObject *self, PyObject *args)
2152{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002153 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002154 char *data;
2155 int datasize;
2156 int bufsize = SMALLCHUNK;
2157 PyObject *ret;
2158 bz_stream _bzs;
2159 bz_stream *bzs = &_bzs;
2160 int bzerror;
2161
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002162 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002163 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002164 data = pdata.buf;
2165 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002166
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002167 if (datasize == 0) {
2168 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002169 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002170 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002171
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002172 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002173 if (!ret) {
2174 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002175 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002176 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002177
2178 memset(bzs, 0, sizeof(bz_stream));
2179
2180 bzs->next_in = data;
2181 bzs->avail_in = datasize;
2182 bzs->next_out = BUF(ret);
2183 bzs->avail_out = bufsize;
2184
2185 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2186 if (bzerror != BZ_OK) {
2187 Util_CatchBZ2Error(bzerror);
2188 Py_DECREF(ret);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002189 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002190 return NULL;
2191 }
Tim Peterse3228092002-11-09 04:21:44 +00002192
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002193 for (;;) {
2194 Py_BEGIN_ALLOW_THREADS
2195 bzerror = BZ2_bzDecompress(bzs);
2196 Py_END_ALLOW_THREADS
2197 if (bzerror == BZ_STREAM_END) {
2198 break;
2199 } else if (bzerror != BZ_OK) {
2200 BZ2_bzDecompressEnd(bzs);
2201 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002202 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002203 Py_DECREF(ret);
2204 return NULL;
2205 }
Georg Brandla47337f2007-03-13 12:34:25 +00002206 if (bzs->avail_in == 0) {
2207 BZ2_bzDecompressEnd(bzs);
2208 PyErr_SetString(PyExc_ValueError,
2209 "couldn't find end of stream");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002210 PyBuffer_Release(&pdata);
Georg Brandla47337f2007-03-13 12:34:25 +00002211 Py_DECREF(ret);
2212 return NULL;
2213 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002214 if (bzs->avail_out == 0) {
2215 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002216 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002217 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002218 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002219 Py_DECREF(ret);
2220 return NULL;
2221 }
2222 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2223 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002224 }
2225 }
2226
2227 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002228 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002229 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002230 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002231
2232 return ret;
2233}
2234
2235static PyMethodDef bz2_methods[] = {
2236 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2237 bz2_compress__doc__},
2238 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2239 bz2_decompress__doc__},
2240 {NULL, NULL} /* sentinel */
2241};
2242
2243/* ===================================================================== */
2244/* Initialization function. */
2245
2246PyDoc_STRVAR(bz2__doc__,
2247"The python bz2 module provides a comprehensive interface for\n\
2248the bz2 compression library. It implements a complete file\n\
2249interface, one shot (de)compression functions, and types for\n\
2250sequential (de)compression.\n\
2251");
2252
Neal Norwitz21d896c2003-07-01 20:15:21 +00002253PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002254initbz2(void)
2255{
2256 PyObject *m;
2257
Christian Heimese93237d2007-12-19 02:37:44 +00002258 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2259 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2260 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002261
2262 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002263 if (m == NULL)
2264 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002265
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002266 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002267
2268 Py_INCREF(&BZ2File_Type);
2269 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2270
2271 Py_INCREF(&BZ2Comp_Type);
2272 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2273
2274 Py_INCREF(&BZ2Decomp_Type);
2275 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2276}