blob: ee24b5d7af15b008ff7fb09b475b121f9ae90db3 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes593daf52008-05-26 12:51:38 +000037#define BUF(v) PyBytes_AS_STRING((PyBytesObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimese93237d2007-12-19 02:37:44 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000238 int bytes_read;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000239 int newlinetypes = f->f_newlinetypes;
240 int skipnextlf = f->f_skipnextlf;
241 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242
243 total_v_size = n > 0 ? n : 100;
Christian Heimes593daf52008-05-26 12:51:38 +0000244 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245 if (v == NULL)
246 return NULL;
247
248 buf = BUF(v);
249 end = buf + total_v_size;
250
251 for (;;) {
252 Py_BEGIN_ALLOW_THREADS
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000253 while (buf != end) {
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bytes_read == 0) break;
257 if (univ_newline) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000261 /* Seeing a \n here with skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 * saw a \r before.
263 */
264 newlinetypes |= NEWLINE_CRLF;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000265 if (bzerror != BZ_OK) break;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0) break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000269 } else {
270 newlinetypes |= NEWLINE_CR;
271 }
272 }
273 if (c == '\r') {
274 skipnextlf = 1;
275 c = '\n';
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000276 } else if (c == '\n')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000277 newlinetypes |= NEWLINE_LF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 }
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000279 *buf++ = c;
280 if (bzerror != BZ_OK || c == '\n') break;
281 }
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000285 f->f_newlinetypes = newlinetypes;
286 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000287 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->size = f->pos;
289 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290 break;
291 } else if (bzerror != BZ_OK) {
292 Util_CatchBZ2Error(bzerror);
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (c == '\n')
297 break;
298 /* Must be because buf == end */
299 if (n > 0)
300 break;
301 used_v_size = total_v_size;
302 increment = total_v_size >> 2; /* mild exponential growth */
303 total_v_size += increment;
304 if (total_v_size > INT_MAX) {
305 PyErr_SetString(PyExc_OverflowError,
306 "line is longer than a Python string can hold");
307 Py_DECREF(v);
308 return NULL;
309 }
Christian Heimes593daf52008-05-26 12:51:38 +0000310 if (_PyBytes_Resize(&v, total_v_size) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000311 return NULL;
312 buf = BUF(v) + used_v_size;
313 end = BUF(v) + total_v_size;
314 }
315
316 used_v_size = buf - BUF(v);
317 if (used_v_size != total_v_size)
Christian Heimes593daf52008-05-26 12:51:38 +0000318 _PyBytes_Resize(&v, used_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000319 return v;
320}
321
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000322/* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
324size_t
325Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327{
328 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 int newlinetypes, skipnextlf;
330
331 assert(buf != NULL);
332 assert(stream != NULL);
333
334 if (!f->f_univ_newline)
335 return BZ2_bzRead(bzerror, stream, buf, n);
336
337 newlinetypes = f->f_newlinetypes;
338 skipnextlf = f->f_skipnextlf;
339
340 /* Invariant: n is the number of bytes remaining to be filled
341 * in the buffer.
342 */
343 while (n) {
344 size_t nread;
345 int shortread;
346 char *src = dst;
347
348 nread = BZ2_bzRead(bzerror, stream, dst, n);
349 assert(nread <= n);
350 n -= nread; /* assuming 1 byte out for each in; will adjust */
351 shortread = n != 0; /* true iff EOF or error */
352 while (nread--) {
353 char c = *src++;
354 if (c == '\r') {
355 /* Save as LF and set flag to skip next LF. */
356 *dst++ = '\n';
357 skipnextlf = 1;
358 }
359 else if (skipnextlf && c == '\n') {
360 /* Skip LF, and remember we saw CR LF. */
361 skipnextlf = 0;
362 newlinetypes |= NEWLINE_CRLF;
363 ++n;
364 }
365 else {
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
369 */
370 if (c == '\n')
371 newlinetypes |= NEWLINE_LF;
372 else if (skipnextlf)
373 newlinetypes |= NEWLINE_CR;
374 *dst++ = c;
375 skipnextlf = 0;
376 }
377 }
378 if (shortread) {
379 /* If this is EOF, update type flags. */
380 if (skipnextlf && *bzerror == BZ_STREAM_END)
381 newlinetypes |= NEWLINE_CR;
382 break;
383 }
384 }
385 f->f_newlinetypes = newlinetypes;
386 f->f_skipnextlf = skipnextlf;
387 return dst - buf;
388}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389
390/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
391static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000392Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000393{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394 if (f->f_buf != NULL) {
395 PyMem_Free(f->f_buf);
396 f->f_buf = NULL;
397 }
398}
399
400/* This is a hacked version of Python's fileobject.c:readahead(). */
401static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
404 int chunksize;
405 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406
407 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000408 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409 return 0;
410 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000411 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000414 f->f_bufptr = f->f_buf;
415 f->f_bufend = f->f_buf;
416 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 }
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
419 return -1;
420 }
421 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000422 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
423 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000425 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000426 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000427 f->size = f->pos;
428 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000429 } else if (bzerror != BZ_OK) {
430 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 return -1;
433 }
434 f->f_bufptr = f->f_buf;
435 f->f_bufend = f->f_buf + chunksize;
436 return 0;
437}
438
439/* This is a hacked version of Python's
440 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes593daf52008-05-26 12:51:38 +0000441static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000442Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000443{
Christian Heimes593daf52008-05-26 12:51:38 +0000444 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445 char *bufptr;
446 char *buf;
447 int len;
448
449 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000450 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000451 return NULL;
452
453 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000454 if (len == 0)
Christian Heimes593daf52008-05-26 12:51:38 +0000455 return (PyBytesObject *)
456 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 bufptr = memchr(f->f_bufptr, '\n', len);
458 if (bufptr != NULL) {
459 bufptr++; /* Count the '\n' */
460 len = bufptr - f->f_bufptr;
Christian Heimes593daf52008-05-26 12:51:38 +0000461 s = (PyBytesObject *)
462 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000463 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000464 return NULL;
Christian Heimes593daf52008-05-26 12:51:38 +0000465 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000466 f->f_bufptr = bufptr;
467 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000468 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000469 } else {
470 bufptr = f->f_bufptr;
471 buf = f->f_buf;
472 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000473 s = Util_ReadAheadGetLineSkip(f, skip+len,
474 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 if (s == NULL) {
476 PyMem_Free(buf);
477 return NULL;
478 }
Christian Heimes593daf52008-05-26 12:51:38 +0000479 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000480 PyMem_Free(buf);
481 }
482 return s;
483}
484
485/* ===================================================================== */
486/* Methods of BZ2File. */
487
488PyDoc_STRVAR(BZ2File_read__doc__,
489"read([size]) -> string\n\
490\n\
491Read at most size uncompressed bytes, returned as a string. If the size\n\
492argument is negative or omitted, read until EOF is reached.\n\
493");
494
495/* This is a hacked version of Python's fileobject.c:file_read(). */
496static PyObject *
497BZ2File_read(BZ2FileObject *self, PyObject *args)
498{
499 long bytesrequested = -1;
500 size_t bytesread, buffersize, chunksize;
501 int bzerror;
502 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000503
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000504 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
505 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000506
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000507 ACQUIRE_LOCK(self);
508 switch (self->mode) {
509 case MODE_READ:
510 break;
511 case MODE_READ_EOF:
Christian Heimes593daf52008-05-26 12:51:38 +0000512 ret = PyBytes_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 goto cleanup;
514 case MODE_CLOSED:
515 PyErr_SetString(PyExc_ValueError,
516 "I/O operation on closed file");
517 goto cleanup;
518 default:
519 PyErr_SetString(PyExc_IOError,
520 "file is not ready for reading");
521 goto cleanup;
522 }
523
524 if (bytesrequested < 0)
525 buffersize = Util_NewBufferSize((size_t)0);
526 else
527 buffersize = bytesrequested;
528 if (buffersize > INT_MAX) {
529 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000530 "requested number of bytes is "
531 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000532 goto cleanup;
533 }
Christian Heimes593daf52008-05-26 12:51:38 +0000534 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000535 if (ret == NULL)
536 goto cleanup;
537 bytesread = 0;
538
539 for (;;) {
540 Py_BEGIN_ALLOW_THREADS
541 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
542 BUF(ret)+bytesread,
543 buffersize-bytesread,
544 self);
545 self->pos += chunksize;
546 Py_END_ALLOW_THREADS
547 bytesread += chunksize;
548 if (bzerror == BZ_STREAM_END) {
549 self->size = self->pos;
550 self->mode = MODE_READ_EOF;
551 break;
552 } else if (bzerror != BZ_OK) {
553 Util_CatchBZ2Error(bzerror);
554 Py_DECREF(ret);
555 ret = NULL;
556 goto cleanup;
557 }
558 if (bytesrequested < 0) {
559 buffersize = Util_NewBufferSize(buffersize);
Christian Heimes593daf52008-05-26 12:51:38 +0000560 if (_PyBytes_Resize(&ret, buffersize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000561 goto cleanup;
562 } else {
563 break;
564 }
565 }
566 if (bytesread != buffersize)
Christian Heimes593daf52008-05-26 12:51:38 +0000567 _PyBytes_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000568
569cleanup:
570 RELEASE_LOCK(self);
571 return ret;
572}
573
574PyDoc_STRVAR(BZ2File_readline__doc__,
575"readline([size]) -> string\n\
576\n\
577Return the next line from the file, as a string, retaining newline.\n\
578A non-negative size argument will limit the maximum number of bytes to\n\
579return (an incomplete line may be returned then). Return an empty\n\
580string at EOF.\n\
581");
582
583static PyObject *
584BZ2File_readline(BZ2FileObject *self, PyObject *args)
585{
586 PyObject *ret = NULL;
587 int sizehint = -1;
588
589 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
590 return NULL;
591
592 ACQUIRE_LOCK(self);
593 switch (self->mode) {
594 case MODE_READ:
595 break;
596 case MODE_READ_EOF:
Christian Heimes593daf52008-05-26 12:51:38 +0000597 ret = PyBytes_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000598 goto cleanup;
599 case MODE_CLOSED:
600 PyErr_SetString(PyExc_ValueError,
601 "I/O operation on closed file");
602 goto cleanup;
603 default:
604 PyErr_SetString(PyExc_IOError,
605 "file is not ready for reading");
606 goto cleanup;
607 }
608
609 if (sizehint == 0)
Christian Heimes593daf52008-05-26 12:51:38 +0000610 ret = PyBytes_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000611 else
612 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
613
614cleanup:
615 RELEASE_LOCK(self);
616 return ret;
617}
618
619PyDoc_STRVAR(BZ2File_readlines__doc__,
620"readlines([size]) -> list\n\
621\n\
622Call readline() repeatedly and return a list of lines read.\n\
623The optional size argument, if given, is an approximate bound on the\n\
624total number of bytes in the lines returned.\n\
625");
626
627/* This is a hacked version of Python's fileobject.c:file_readlines(). */
628static PyObject *
629BZ2File_readlines(BZ2FileObject *self, PyObject *args)
630{
631 long sizehint = 0;
632 PyObject *list = NULL;
633 PyObject *line;
634 char small_buffer[SMALLCHUNK];
635 char *buffer = small_buffer;
636 size_t buffersize = SMALLCHUNK;
637 PyObject *big_buffer = NULL;
638 size_t nfilled = 0;
639 size_t nread;
640 size_t totalread = 0;
641 char *p, *q, *end;
642 int err;
643 int shortread = 0;
644 int bzerror;
645
646 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
647 return NULL;
648
649 ACQUIRE_LOCK(self);
650 switch (self->mode) {
651 case MODE_READ:
652 break;
653 case MODE_READ_EOF:
654 list = PyList_New(0);
655 goto cleanup;
656 case MODE_CLOSED:
657 PyErr_SetString(PyExc_ValueError,
658 "I/O operation on closed file");
659 goto cleanup;
660 default:
661 PyErr_SetString(PyExc_IOError,
662 "file is not ready for reading");
663 goto cleanup;
664 }
665
666 if ((list = PyList_New(0)) == NULL)
667 goto cleanup;
668
669 for (;;) {
670 Py_BEGIN_ALLOW_THREADS
671 nread = Util_UnivNewlineRead(&bzerror, self->fp,
672 buffer+nfilled,
673 buffersize-nfilled, self);
674 self->pos += nread;
675 Py_END_ALLOW_THREADS
676 if (bzerror == BZ_STREAM_END) {
677 self->size = self->pos;
678 self->mode = MODE_READ_EOF;
679 if (nread == 0) {
680 sizehint = 0;
681 break;
682 }
683 shortread = 1;
684 } else if (bzerror != BZ_OK) {
685 Util_CatchBZ2Error(bzerror);
686 error:
687 Py_DECREF(list);
688 list = NULL;
689 goto cleanup;
690 }
691 totalread += nread;
692 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000693 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000694 /* Need a larger buffer to fit this line */
695 nfilled += nread;
696 buffersize *= 2;
697 if (buffersize > INT_MAX) {
698 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 goto error;
701 }
702 if (big_buffer == NULL) {
703 /* Create the big buffer */
Christian Heimes593daf52008-05-26 12:51:38 +0000704 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000705 NULL, buffersize);
706 if (big_buffer == NULL)
707 goto error;
Christian Heimes593daf52008-05-26 12:51:38 +0000708 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000709 memcpy(buffer, small_buffer, nfilled);
710 }
711 else {
712 /* Grow the big buffer */
Christian Heimes593daf52008-05-26 12:51:38 +0000713 _PyBytes_Resize(&big_buffer, buffersize);
714 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000715 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000716 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000717 }
718 end = buffer+nfilled+nread;
719 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000720 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000721 /* Process complete lines */
722 p++;
Christian Heimes593daf52008-05-26 12:51:38 +0000723 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000724 if (line == NULL)
725 goto error;
726 err = PyList_Append(list, line);
727 Py_DECREF(line);
728 if (err != 0)
729 goto error;
730 q = p;
731 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000732 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000733 /* Move the remaining incomplete line to the start */
734 nfilled = end-q;
735 memmove(buffer, q, nfilled);
736 if (sizehint > 0)
737 if (totalread >= (size_t)sizehint)
738 break;
739 if (shortread) {
740 sizehint = 0;
741 break;
742 }
743 }
744 if (nfilled != 0) {
745 /* Partial last line */
Christian Heimes593daf52008-05-26 12:51:38 +0000746 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000747 if (line == NULL)
748 goto error;
749 if (sizehint > 0) {
750 /* Need to complete the last line */
751 PyObject *rest = Util_GetLine(self, 0);
752 if (rest == NULL) {
753 Py_DECREF(line);
754 goto error;
755 }
Christian Heimes593daf52008-05-26 12:51:38 +0000756 PyBytes_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000757 Py_DECREF(rest);
758 if (line == NULL)
759 goto error;
760 }
761 err = PyList_Append(list, line);
762 Py_DECREF(line);
763 if (err != 0)
764 goto error;
765 }
766
767 cleanup:
768 RELEASE_LOCK(self);
769 if (big_buffer) {
770 Py_DECREF(big_buffer);
771 }
772 return list;
773}
774
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000775PyDoc_STRVAR(BZ2File_xreadlines__doc__,
776"xreadlines() -> self\n\
777\n\
778For backward compatibility. BZ2File objects now include the performance\n\
779optimizations previously implemented in the xreadlines module.\n\
780");
781
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000782PyDoc_STRVAR(BZ2File_write__doc__,
783"write(data) -> None\n\
784\n\
785Write the 'data' string to file. Note that due to buffering, close() may\n\
786be needed before the file on disk reflects the data written.\n\
787");
788
789/* This is a hacked version of Python's fileobject.c:file_write(). */
790static PyObject *
791BZ2File_write(BZ2FileObject *self, PyObject *args)
792{
793 PyObject *ret = NULL;
794 char *buf;
795 int len;
796 int bzerror;
797
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000798 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000799 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000800
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000801 ACQUIRE_LOCK(self);
802 switch (self->mode) {
803 case MODE_WRITE:
804 break;
Tim Peterse3228092002-11-09 04:21:44 +0000805
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000806 case MODE_CLOSED:
807 PyErr_SetString(PyExc_ValueError,
808 "I/O operation on closed file");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000809 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000810
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000811 default:
812 PyErr_SetString(PyExc_IOError,
813 "file is not ready for writing");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000814 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000815 }
816
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000817 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000818
819 Py_BEGIN_ALLOW_THREADS
820 BZ2_bzWrite (&bzerror, self->fp, buf, len);
821 self->pos += len;
822 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000823
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824 if (bzerror != BZ_OK) {
825 Util_CatchBZ2Error(bzerror);
826 goto cleanup;
827 }
Tim Peterse3228092002-11-09 04:21:44 +0000828
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000829 Py_INCREF(Py_None);
830 ret = Py_None;
831
832cleanup:
833 RELEASE_LOCK(self);
834 return ret;
835}
836
837PyDoc_STRVAR(BZ2File_writelines__doc__,
838"writelines(sequence_of_strings) -> None\n\
839\n\
840Write the sequence of strings to the file. Note that newlines are not\n\
841added. The sequence can be any iterable object producing strings. This is\n\
842equivalent to calling write() for each string.\n\
843");
844
845/* This is a hacked version of Python's fileobject.c:file_writelines(). */
846static PyObject *
847BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
848{
849#define CHUNKSIZE 1000
850 PyObject *list = NULL;
851 PyObject *iter = NULL;
852 PyObject *ret = NULL;
853 PyObject *line;
854 int i, j, index, len, islist;
855 int bzerror;
856
857 ACQUIRE_LOCK(self);
Georg Brandl3335a7a2006-08-14 21:42:55 +0000858 switch (self->mode) {
859 case MODE_WRITE:
860 break;
861
862 case MODE_CLOSED:
863 PyErr_SetString(PyExc_ValueError,
864 "I/O operation on closed file");
865 goto error;
866
867 default:
868 PyErr_SetString(PyExc_IOError,
869 "file is not ready for writing");
870 goto error;
871 }
872
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000873 islist = PyList_Check(seq);
874 if (!islist) {
875 iter = PyObject_GetIter(seq);
876 if (iter == NULL) {
877 PyErr_SetString(PyExc_TypeError,
878 "writelines() requires an iterable argument");
879 goto error;
880 }
881 list = PyList_New(CHUNKSIZE);
882 if (list == NULL)
883 goto error;
884 }
885
886 /* Strategy: slurp CHUNKSIZE lines into a private list,
887 checking that they are all strings, then write that list
888 without holding the interpreter lock, then come back for more. */
889 for (index = 0; ; index += CHUNKSIZE) {
890 if (islist) {
891 Py_XDECREF(list);
892 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
893 if (list == NULL)
894 goto error;
895 j = PyList_GET_SIZE(list);
896 }
897 else {
898 for (j = 0; j < CHUNKSIZE; j++) {
899 line = PyIter_Next(iter);
900 if (line == NULL) {
901 if (PyErr_Occurred())
902 goto error;
903 break;
904 }
905 PyList_SetItem(list, j, line);
906 }
907 }
908 if (j == 0)
909 break;
910
911 /* Check that all entries are indeed strings. If not,
912 apply the same rules as for file.write() and
913 convert the rets to strings. This is slow, but
914 seems to be the only way since all conversion APIs
915 could potentially execute Python code. */
916 for (i = 0; i < j; i++) {
917 PyObject *v = PyList_GET_ITEM(list, i);
Christian Heimes593daf52008-05-26 12:51:38 +0000918 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000919 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000920 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000921 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
922 PyErr_SetString(PyExc_TypeError,
923 "writelines() "
924 "argument must be "
925 "a sequence of "
926 "strings");
927 goto error;
928 }
Christian Heimes593daf52008-05-26 12:51:38 +0000929 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930 len);
931 if (line == NULL)
932 goto error;
933 Py_DECREF(v);
934 PyList_SET_ITEM(list, i, line);
935 }
936 }
937
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000938 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000939
940 /* Since we are releasing the global lock, the
941 following code may *not* execute Python code. */
942 Py_BEGIN_ALLOW_THREADS
943 for (i = 0; i < j; i++) {
944 line = PyList_GET_ITEM(list, i);
Christian Heimes593daf52008-05-26 12:51:38 +0000945 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000946 BZ2_bzWrite (&bzerror, self->fp,
Christian Heimes593daf52008-05-26 12:51:38 +0000947 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000948 if (bzerror != BZ_OK) {
949 Py_BLOCK_THREADS
950 Util_CatchBZ2Error(bzerror);
951 goto error;
952 }
953 }
954 Py_END_ALLOW_THREADS
955
956 if (j < CHUNKSIZE)
957 break;
958 }
959
960 Py_INCREF(Py_None);
961 ret = Py_None;
962
963 error:
964 RELEASE_LOCK(self);
965 Py_XDECREF(list);
966 Py_XDECREF(iter);
967 return ret;
968#undef CHUNKSIZE
969}
970
971PyDoc_STRVAR(BZ2File_seek__doc__,
972"seek(offset [, whence]) -> None\n\
973\n\
974Move to new file position. Argument offset is a byte count. Optional\n\
975argument whence defaults to 0 (offset from start of file, offset\n\
976should be >= 0); other values are 1 (move relative to current position,\n\
977positive or negative), and 2 (move relative to end of file, usually\n\
978negative, although many platforms allow seeking beyond the end of a file).\n\
979\n\
980Note that seeking of bz2 files is emulated, and depending on the parameters\n\
981the operation may be extremely slow.\n\
982");
983
984static PyObject *
985BZ2File_seek(BZ2FileObject *self, PyObject *args)
986{
987 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000988 PyObject *offobj;
989 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000990 char small_buffer[SMALLCHUNK];
991 char *buffer = small_buffer;
992 size_t buffersize = SMALLCHUNK;
Andrew M. Kuchling44b054b2006-12-18 19:22:24 +0000993 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000994 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000995 int chunksize;
996 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000997 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000998
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000999 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1000 return NULL;
1001#if !defined(HAVE_LARGEFILE_SUPPORT)
1002 offset = PyInt_AsLong(offobj);
1003#else
1004 offset = PyLong_Check(offobj) ?
1005 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1006#endif
1007 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001008 return NULL;
1009
1010 ACQUIRE_LOCK(self);
1011 Util_DropReadAhead(self);
1012 switch (self->mode) {
1013 case MODE_READ:
1014 case MODE_READ_EOF:
1015 break;
Tim Peterse3228092002-11-09 04:21:44 +00001016
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001017 case MODE_CLOSED:
1018 PyErr_SetString(PyExc_ValueError,
1019 "I/O operation on closed file");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001020 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001021
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001022 default:
1023 PyErr_SetString(PyExc_IOError,
1024 "seek works only while reading");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001025 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001026 }
1027
Georg Brandl47fab922006-02-18 21:57:25 +00001028 if (where == 2) {
1029 if (self->size == -1) {
1030 assert(self->mode != MODE_READ_EOF);
1031 for (;;) {
1032 Py_BEGIN_ALLOW_THREADS
1033 chunksize = Util_UnivNewlineRead(
1034 &bzerror, self->fp,
1035 buffer, buffersize,
1036 self);
1037 self->pos += chunksize;
1038 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039
Georg Brandl47fab922006-02-18 21:57:25 +00001040 bytesread += chunksize;
1041 if (bzerror == BZ_STREAM_END) {
1042 break;
1043 } else if (bzerror != BZ_OK) {
1044 Util_CatchBZ2Error(bzerror);
1045 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001046 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047 }
Georg Brandl47fab922006-02-18 21:57:25 +00001048 self->mode = MODE_READ_EOF;
1049 self->size = self->pos;
1050 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001051 }
Georg Brandl47fab922006-02-18 21:57:25 +00001052 offset = self->size + offset;
1053 } else if (where == 1) {
1054 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001055 }
1056
Georg Brandl47fab922006-02-18 21:57:25 +00001057 /* Before getting here, offset must be the absolute position the file
1058 * pointer should be set to. */
1059
1060 if (offset >= self->pos) {
1061 /* we can move forward */
1062 offset -= self->pos;
1063 } else {
1064 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065 BZ2_bzReadClose(&bzerror, self->fp);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001066 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001067 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001068 self->fp = NULL;
1069 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1073 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001074 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001075 if (!ret)
1076 goto cleanup;
1077 Py_DECREF(ret);
1078 ret = NULL;
1079 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001080 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001081 0, 0, NULL, 0);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001082 if (self->fp)
Gregory P. Smith73bee442008-04-12 20:37:48 +00001083 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001084 if (bzerror != BZ_OK) {
1085 Util_CatchBZ2Error(bzerror);
1086 goto cleanup;
1087 }
1088 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001089 }
1090
Georg Brandl47fab922006-02-18 21:57:25 +00001091 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001092 goto exit;
1093
1094 /* Before getting here, offset must be set to the number of bytes
1095 * to walk forward. */
1096 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001097 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001098 readsize = buffersize;
1099 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001100 /* offset might be wider that readsize, but the result
1101 * of the subtraction is bound by buffersize (see the
1102 * condition above). buffersize is 8192. */
1103 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001104 Py_BEGIN_ALLOW_THREADS
1105 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106 buffer, readsize, self);
1107 self->pos += chunksize;
1108 Py_END_ALLOW_THREADS
1109 bytesread += chunksize;
1110 if (bzerror == BZ_STREAM_END) {
1111 self->size = self->pos;
1112 self->mode = MODE_READ_EOF;
1113 break;
1114 } else if (bzerror != BZ_OK) {
1115 Util_CatchBZ2Error(bzerror);
1116 goto cleanup;
1117 }
1118 if (bytesread == offset)
1119 break;
1120 }
1121
1122exit:
1123 Py_INCREF(Py_None);
1124 ret = Py_None;
1125
1126cleanup:
1127 RELEASE_LOCK(self);
1128 return ret;
1129}
1130
1131PyDoc_STRVAR(BZ2File_tell__doc__,
1132"tell() -> int\n\
1133\n\
1134Return the current file position, an integer (may be a long integer).\n\
1135");
1136
1137static PyObject *
1138BZ2File_tell(BZ2FileObject *self, PyObject *args)
1139{
1140 PyObject *ret = NULL;
1141
1142 if (self->mode == MODE_CLOSED) {
1143 PyErr_SetString(PyExc_ValueError,
1144 "I/O operation on closed file");
1145 goto cleanup;
1146 }
1147
Georg Brandla8bcecc2005-09-03 07:49:53 +00001148#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001149 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001150#else
1151 ret = PyLong_FromLongLong(self->pos);
1152#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001153
1154cleanup:
1155 return ret;
1156}
1157
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001158PyDoc_STRVAR(BZ2File_close__doc__,
1159"close() -> None or (perhaps) an integer\n\
1160\n\
1161Close the file. Sets data attribute .closed to true. A closed file\n\
1162cannot be used for further I/O operations. close() may be called more\n\
1163than once without error.\n\
1164");
1165
1166static PyObject *
1167BZ2File_close(BZ2FileObject *self)
1168{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001169 PyObject *ret = NULL;
1170 int bzerror = BZ_OK;
1171
1172 ACQUIRE_LOCK(self);
1173 switch (self->mode) {
1174 case MODE_READ:
1175 case MODE_READ_EOF:
1176 BZ2_bzReadClose(&bzerror, self->fp);
1177 break;
1178 case MODE_WRITE:
1179 BZ2_bzWriteClose(&bzerror, self->fp,
1180 0, NULL, NULL);
1181 break;
1182 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001183 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001184 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001185 self->fp = NULL;
1186 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001187 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001188 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001189 if (bzerror != BZ_OK) {
1190 Util_CatchBZ2Error(bzerror);
1191 Py_XDECREF(ret);
1192 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001193 }
1194
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001195 RELEASE_LOCK(self);
1196 return ret;
1197}
1198
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001199static PyObject *BZ2File_getiter(BZ2FileObject *self);
1200
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001201static PyMethodDef BZ2File_methods[] = {
1202 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1203 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1204 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001205 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001206 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1207 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1208 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1209 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001210 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1211 {NULL, NULL} /* sentinel */
1212};
1213
1214
1215/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001216/* Getters and setters of BZ2File. */
1217
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001218/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1219static PyObject *
1220BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1221{
1222 switch (self->f_newlinetypes) {
1223 case NEWLINE_UNKNOWN:
1224 Py_INCREF(Py_None);
1225 return Py_None;
1226 case NEWLINE_CR:
Christian Heimes593daf52008-05-26 12:51:38 +00001227 return PyBytes_FromString("\r");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001228 case NEWLINE_LF:
Christian Heimes593daf52008-05-26 12:51:38 +00001229 return PyBytes_FromString("\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001230 case NEWLINE_CR|NEWLINE_LF:
1231 return Py_BuildValue("(ss)", "\r", "\n");
1232 case NEWLINE_CRLF:
Christian Heimes593daf52008-05-26 12:51:38 +00001233 return PyBytes_FromString("\r\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001234 case NEWLINE_CR|NEWLINE_CRLF:
1235 return Py_BuildValue("(ss)", "\r", "\r\n");
1236 case NEWLINE_LF|NEWLINE_CRLF:
1237 return Py_BuildValue("(ss)", "\n", "\r\n");
1238 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1239 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1240 default:
1241 PyErr_Format(PyExc_SystemError,
1242 "Unknown newlines value 0x%x\n",
1243 self->f_newlinetypes);
1244 return NULL;
1245 }
1246}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001247
1248static PyObject *
1249BZ2File_get_closed(BZ2FileObject *self, void *closure)
1250{
1251 return PyInt_FromLong(self->mode == MODE_CLOSED);
1252}
1253
1254static PyObject *
1255BZ2File_get_mode(BZ2FileObject *self, void *closure)
1256{
1257 return PyObject_GetAttrString(self->file, "mode");
1258}
1259
1260static PyObject *
1261BZ2File_get_name(BZ2FileObject *self, void *closure)
1262{
1263 return PyObject_GetAttrString(self->file, "name");
1264}
1265
1266static PyGetSetDef BZ2File_getset[] = {
1267 {"closed", (getter)BZ2File_get_closed, NULL,
1268 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001269 {"newlines", (getter)BZ2File_get_newlines, NULL,
1270 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001271 {"mode", (getter)BZ2File_get_mode, NULL,
1272 "file mode ('r', 'w', or 'U')"},
1273 {"name", (getter)BZ2File_get_name, NULL,
1274 "file name"},
1275 {NULL} /* Sentinel */
1276};
1277
1278
1279/* ===================================================================== */
1280/* Members of BZ2File_Type. */
1281
1282#undef OFF
1283#define OFF(x) offsetof(BZ2FileObject, x)
1284
1285static PyMemberDef BZ2File_members[] = {
1286 {"softspace", T_INT, OFF(f_softspace), 0,
1287 "flag indicating that a space needs to be printed; used by print"},
1288 {NULL} /* Sentinel */
1289};
1290
1291/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001292/* Slot definitions for BZ2File_Type. */
1293
1294static int
1295BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1296{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001297 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001298 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001299 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001300 char *mode = "r";
1301 int buffering = -1;
1302 int compresslevel = 9;
1303 int bzerror;
1304 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305
1306 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001307
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001308 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1309 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001310 &compresslevel))
1311 return -1;
1312
1313 if (compresslevel < 1 || compresslevel > 9) {
1314 PyErr_SetString(PyExc_ValueError,
1315 "compresslevel must be between 1 and 9");
1316 return -1;
1317 }
1318
1319 for (;;) {
1320 int error = 0;
1321 switch (*mode) {
1322 case 'r':
1323 case 'w':
1324 if (mode_char)
1325 error = 1;
1326 mode_char = *mode;
1327 break;
1328
1329 case 'b':
1330 break;
1331
1332 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001333#ifdef __VMS
1334 self->f_univ_newline = 0;
1335#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001336 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001337#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001338 break;
1339
1340 default:
1341 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001342 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343 }
1344 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001345 PyErr_Format(PyExc_ValueError,
1346 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347 return -1;
1348 }
1349 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001350 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001351 break;
1352 }
1353
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001354 if (mode_char == 0) {
1355 mode_char = 'r';
1356 }
1357
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001358 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001359
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001360 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1361 name, mode, buffering);
1362 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001363 return -1;
1364
1365 /* From now on, we have stuff to dealloc, so jump to error label
1366 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001367
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001368#ifdef WITH_THREAD
1369 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001370 if (!self->lock) {
1371 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001372 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001373 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001374#endif
1375
1376 if (mode_char == 'r')
1377 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001378 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001379 0, 0, NULL, 0);
1380 else
1381 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001382 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001383 compresslevel, 0, 0);
1384
1385 if (bzerror != BZ_OK) {
1386 Util_CatchBZ2Error(bzerror);
1387 goto error;
1388 }
Gregory P. Smith73bee442008-04-12 20:37:48 +00001389 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001390
1391 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1392
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001393 return 0;
1394
1395error:
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001396 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001397#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001398 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001399 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001400 self->lock = NULL;
1401 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001403 return -1;
1404}
1405
1406static void
1407BZ2File_dealloc(BZ2FileObject *self)
1408{
1409 int bzerror;
1410#ifdef WITH_THREAD
1411 if (self->lock)
1412 PyThread_free_lock(self->lock);
1413#endif
1414 switch (self->mode) {
1415 case MODE_READ:
1416 case MODE_READ_EOF:
1417 BZ2_bzReadClose(&bzerror, self->fp);
1418 break;
1419 case MODE_WRITE:
1420 BZ2_bzWriteClose(&bzerror, self->fp,
1421 0, NULL, NULL);
1422 break;
1423 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001424 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001425 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001426 self->fp = NULL;
1427 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001428 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001429 Py_XDECREF(self->file);
Christian Heimese93237d2007-12-19 02:37:44 +00001430 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001431}
1432
1433/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1434static PyObject *
1435BZ2File_getiter(BZ2FileObject *self)
1436{
1437 if (self->mode == MODE_CLOSED) {
1438 PyErr_SetString(PyExc_ValueError,
1439 "I/O operation on closed file");
1440 return NULL;
1441 }
1442 Py_INCREF((PyObject*)self);
1443 return (PyObject *)self;
1444}
1445
1446/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1447#define READAHEAD_BUFSIZE 8192
1448static PyObject *
1449BZ2File_iternext(BZ2FileObject *self)
1450{
Christian Heimes593daf52008-05-26 12:51:38 +00001451 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001452 ACQUIRE_LOCK(self);
1453 if (self->mode == MODE_CLOSED) {
1454 PyErr_SetString(PyExc_ValueError,
1455 "I/O operation on closed file");
1456 return NULL;
1457 }
1458 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1459 RELEASE_LOCK(self);
Christian Heimes593daf52008-05-26 12:51:38 +00001460 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001461 Py_XDECREF(ret);
1462 return NULL;
1463 }
1464 return (PyObject *)ret;
1465}
1466
1467/* ===================================================================== */
1468/* BZ2File_Type definition. */
1469
1470PyDoc_VAR(BZ2File__doc__) =
1471PyDoc_STR(
1472"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1473\n\
1474Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1475writing. When opened for writing, the file will be created if it doesn't\n\
1476exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1477unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1478is given, must be a number between 1 and 9.\n\
1479")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001480PyDoc_STR(
1481"\n\
1482Add a 'U' to mode to open the file for input with universal newline\n\
1483support. Any line ending in the input file will be seen as a '\\n' in\n\
1484Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1485for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1486'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1487newlines are available only when reading.\n\
1488")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001489;
1490
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001491static PyTypeObject BZ2File_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001492 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001493 "bz2.BZ2File", /*tp_name*/
1494 sizeof(BZ2FileObject), /*tp_basicsize*/
1495 0, /*tp_itemsize*/
1496 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1497 0, /*tp_print*/
1498 0, /*tp_getattr*/
1499 0, /*tp_setattr*/
1500 0, /*tp_compare*/
1501 0, /*tp_repr*/
1502 0, /*tp_as_number*/
1503 0, /*tp_as_sequence*/
1504 0, /*tp_as_mapping*/
1505 0, /*tp_hash*/
1506 0, /*tp_call*/
1507 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001508 PyObject_GenericGetAttr,/*tp_getattro*/
1509 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001510 0, /*tp_as_buffer*/
1511 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1512 BZ2File__doc__, /*tp_doc*/
1513 0, /*tp_traverse*/
1514 0, /*tp_clear*/
1515 0, /*tp_richcompare*/
1516 0, /*tp_weaklistoffset*/
1517 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1518 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1519 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001520 BZ2File_members, /*tp_members*/
1521 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001522 0, /*tp_base*/
1523 0, /*tp_dict*/
1524 0, /*tp_descr_get*/
1525 0, /*tp_descr_set*/
1526 0, /*tp_dictoffset*/
1527 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001528 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001529 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001530 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001531 0, /*tp_is_gc*/
1532};
1533
1534
1535/* ===================================================================== */
1536/* Methods of BZ2Comp. */
1537
1538PyDoc_STRVAR(BZ2Comp_compress__doc__,
1539"compress(data) -> string\n\
1540\n\
1541Provide more data to the compressor object. It will return chunks of\n\
1542compressed data whenever possible. When you've finished providing data\n\
1543to compress, call the flush() method to finish the compression process,\n\
1544and return what is left in the internal buffers.\n\
1545");
1546
1547static PyObject *
1548BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1549{
1550 char *data;
1551 int datasize;
1552 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001553 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001554 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001555 bz_stream *bzs = &self->bzs;
1556 int bzerror;
1557
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001558 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001559 return NULL;
1560
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001561 if (datasize == 0)
Christian Heimes593daf52008-05-26 12:51:38 +00001562 return PyBytes_FromString("");
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001563
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001564 ACQUIRE_LOCK(self);
1565 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001566 PyErr_SetString(PyExc_ValueError,
1567 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001568 goto error;
1569 }
1570
Christian Heimes593daf52008-05-26 12:51:38 +00001571 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001572 if (!ret)
1573 goto error;
1574
1575 bzs->next_in = data;
1576 bzs->avail_in = datasize;
1577 bzs->next_out = BUF(ret);
1578 bzs->avail_out = bufsize;
1579
1580 totalout = BZS_TOTAL_OUT(bzs);
1581
1582 for (;;) {
1583 Py_BEGIN_ALLOW_THREADS
1584 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1585 Py_END_ALLOW_THREADS
1586 if (bzerror != BZ_RUN_OK) {
1587 Util_CatchBZ2Error(bzerror);
1588 goto error;
1589 }
Georg Brandla47337f2007-03-13 12:34:25 +00001590 if (bzs->avail_in == 0)
1591 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001592 if (bzs->avail_out == 0) {
1593 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes593daf52008-05-26 12:51:38 +00001594 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001595 BZ2_bzCompressEnd(bzs);
1596 goto error;
1597 }
1598 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1599 - totalout);
1600 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001601 }
1602 }
1603
Christian Heimes593daf52008-05-26 12:51:38 +00001604 _PyBytes_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001605
1606 RELEASE_LOCK(self);
1607 return ret;
1608
1609error:
1610 RELEASE_LOCK(self);
1611 Py_XDECREF(ret);
1612 return NULL;
1613}
1614
1615PyDoc_STRVAR(BZ2Comp_flush__doc__,
1616"flush() -> string\n\
1617\n\
1618Finish the compression process and return what is left in internal buffers.\n\
1619You must not use the compressor object after calling this method.\n\
1620");
1621
1622static PyObject *
1623BZ2Comp_flush(BZ2CompObject *self)
1624{
1625 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001626 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001627 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001628 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001629 int bzerror;
1630
1631 ACQUIRE_LOCK(self);
1632 if (!self->running) {
1633 PyErr_SetString(PyExc_ValueError, "object was already "
1634 "flushed");
1635 goto error;
1636 }
1637 self->running = 0;
1638
Christian Heimes593daf52008-05-26 12:51:38 +00001639 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640 if (!ret)
1641 goto error;
1642
1643 bzs->next_out = BUF(ret);
1644 bzs->avail_out = bufsize;
1645
1646 totalout = BZS_TOTAL_OUT(bzs);
1647
1648 for (;;) {
1649 Py_BEGIN_ALLOW_THREADS
1650 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1651 Py_END_ALLOW_THREADS
1652 if (bzerror == BZ_STREAM_END) {
1653 break;
1654 } else if (bzerror != BZ_FINISH_OK) {
1655 Util_CatchBZ2Error(bzerror);
1656 goto error;
1657 }
1658 if (bzs->avail_out == 0) {
1659 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes593daf52008-05-26 12:51:38 +00001660 if (_PyBytes_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001661 goto error;
1662 bzs->next_out = BUF(ret);
1663 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1664 - totalout);
1665 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1666 }
1667 }
1668
1669 if (bzs->avail_out != 0)
Christian Heimes593daf52008-05-26 12:51:38 +00001670 _PyBytes_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001671
1672 RELEASE_LOCK(self);
1673 return ret;
1674
1675error:
1676 RELEASE_LOCK(self);
1677 Py_XDECREF(ret);
1678 return NULL;
1679}
1680
1681static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001682 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1683 BZ2Comp_compress__doc__},
1684 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1685 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001686 {NULL, NULL} /* sentinel */
1687};
1688
1689
1690/* ===================================================================== */
1691/* Slot definitions for BZ2Comp_Type. */
1692
1693static int
1694BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1695{
1696 int compresslevel = 9;
1697 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001698 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001699
1700 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1701 kwlist, &compresslevel))
1702 return -1;
1703
1704 if (compresslevel < 1 || compresslevel > 9) {
1705 PyErr_SetString(PyExc_ValueError,
1706 "compresslevel must be between 1 and 9");
1707 goto error;
1708 }
1709
1710#ifdef WITH_THREAD
1711 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001712 if (!self->lock) {
1713 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001714 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001715 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001716#endif
1717
1718 memset(&self->bzs, 0, sizeof(bz_stream));
1719 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1720 if (bzerror != BZ_OK) {
1721 Util_CatchBZ2Error(bzerror);
1722 goto error;
1723 }
1724
1725 self->running = 1;
1726
1727 return 0;
1728error:
1729#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001730 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001731 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001732 self->lock = NULL;
1733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001734#endif
1735 return -1;
1736}
1737
1738static void
1739BZ2Comp_dealloc(BZ2CompObject *self)
1740{
1741#ifdef WITH_THREAD
1742 if (self->lock)
1743 PyThread_free_lock(self->lock);
1744#endif
1745 BZ2_bzCompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001746 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001747}
1748
1749
1750/* ===================================================================== */
1751/* BZ2Comp_Type definition. */
1752
1753PyDoc_STRVAR(BZ2Comp__doc__,
1754"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1755\n\
1756Create a new compressor object. This object may be used to compress\n\
1757data sequentially. If you want to compress data in one shot, use the\n\
1758compress() function instead. The compresslevel parameter, if given,\n\
1759must be a number between 1 and 9.\n\
1760");
1761
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001762static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001763 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001764 "bz2.BZ2Compressor", /*tp_name*/
1765 sizeof(BZ2CompObject), /*tp_basicsize*/
1766 0, /*tp_itemsize*/
1767 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1768 0, /*tp_print*/
1769 0, /*tp_getattr*/
1770 0, /*tp_setattr*/
1771 0, /*tp_compare*/
1772 0, /*tp_repr*/
1773 0, /*tp_as_number*/
1774 0, /*tp_as_sequence*/
1775 0, /*tp_as_mapping*/
1776 0, /*tp_hash*/
1777 0, /*tp_call*/
1778 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001779 PyObject_GenericGetAttr,/*tp_getattro*/
1780 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001781 0, /*tp_as_buffer*/
1782 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1783 BZ2Comp__doc__, /*tp_doc*/
1784 0, /*tp_traverse*/
1785 0, /*tp_clear*/
1786 0, /*tp_richcompare*/
1787 0, /*tp_weaklistoffset*/
1788 0, /*tp_iter*/
1789 0, /*tp_iternext*/
1790 BZ2Comp_methods, /*tp_methods*/
1791 0, /*tp_members*/
1792 0, /*tp_getset*/
1793 0, /*tp_base*/
1794 0, /*tp_dict*/
1795 0, /*tp_descr_get*/
1796 0, /*tp_descr_set*/
1797 0, /*tp_dictoffset*/
1798 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001799 PyType_GenericAlloc, /*tp_alloc*/
1800 PyType_GenericNew, /*tp_new*/
1801 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001802 0, /*tp_is_gc*/
1803};
1804
1805
1806/* ===================================================================== */
1807/* Members of BZ2Decomp. */
1808
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001809#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001810#define OFF(x) offsetof(BZ2DecompObject, x)
1811
1812static PyMemberDef BZ2Decomp_members[] = {
1813 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1814 {NULL} /* Sentinel */
1815};
1816
1817
1818/* ===================================================================== */
1819/* Methods of BZ2Decomp. */
1820
1821PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1822"decompress(data) -> string\n\
1823\n\
1824Provide more data to the decompressor object. It will return chunks\n\
1825of decompressed data whenever possible. If you try to decompress data\n\
1826after the end of stream is found, EOFError will be raised. If any data\n\
1827was found after the end of stream, it'll be ignored and saved in\n\
1828unused_data attribute.\n\
1829");
1830
1831static PyObject *
1832BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1833{
1834 char *data;
1835 int datasize;
1836 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001837 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001838 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001839 bz_stream *bzs = &self->bzs;
1840 int bzerror;
1841
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001842 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001843 return NULL;
1844
1845 ACQUIRE_LOCK(self);
1846 if (!self->running) {
1847 PyErr_SetString(PyExc_EOFError, "end of stream was "
1848 "already found");
1849 goto error;
1850 }
1851
Christian Heimes593daf52008-05-26 12:51:38 +00001852 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001853 if (!ret)
1854 goto error;
1855
1856 bzs->next_in = data;
1857 bzs->avail_in = datasize;
1858 bzs->next_out = BUF(ret);
1859 bzs->avail_out = bufsize;
1860
1861 totalout = BZS_TOTAL_OUT(bzs);
1862
1863 for (;;) {
1864 Py_BEGIN_ALLOW_THREADS
1865 bzerror = BZ2_bzDecompress(bzs);
1866 Py_END_ALLOW_THREADS
1867 if (bzerror == BZ_STREAM_END) {
1868 if (bzs->avail_in != 0) {
1869 Py_DECREF(self->unused_data);
1870 self->unused_data =
Christian Heimes593daf52008-05-26 12:51:38 +00001871 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001872 bzs->avail_in);
1873 }
1874 self->running = 0;
1875 break;
1876 }
1877 if (bzerror != BZ_OK) {
1878 Util_CatchBZ2Error(bzerror);
1879 goto error;
1880 }
Georg Brandla47337f2007-03-13 12:34:25 +00001881 if (bzs->avail_in == 0)
1882 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001883 if (bzs->avail_out == 0) {
1884 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes593daf52008-05-26 12:51:38 +00001885 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001886 BZ2_bzDecompressEnd(bzs);
1887 goto error;
1888 }
1889 bzs->next_out = BUF(ret);
1890 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1891 - totalout);
1892 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001893 }
1894 }
1895
1896 if (bzs->avail_out != 0)
Christian Heimes593daf52008-05-26 12:51:38 +00001897 _PyBytes_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001898
1899 RELEASE_LOCK(self);
1900 return ret;
1901
1902error:
1903 RELEASE_LOCK(self);
1904 Py_XDECREF(ret);
1905 return NULL;
1906}
1907
1908static PyMethodDef BZ2Decomp_methods[] = {
1909 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1910 {NULL, NULL} /* sentinel */
1911};
1912
1913
1914/* ===================================================================== */
1915/* Slot definitions for BZ2Decomp_Type. */
1916
1917static int
1918BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1919{
1920 int bzerror;
1921
1922 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1923 return -1;
1924
1925#ifdef WITH_THREAD
1926 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001927 if (!self->lock) {
1928 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001929 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001930 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001931#endif
1932
Christian Heimes593daf52008-05-26 12:51:38 +00001933 self->unused_data = PyBytes_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001934 if (!self->unused_data)
1935 goto error;
1936
1937 memset(&self->bzs, 0, sizeof(bz_stream));
1938 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1939 if (bzerror != BZ_OK) {
1940 Util_CatchBZ2Error(bzerror);
1941 goto error;
1942 }
1943
1944 self->running = 1;
1945
1946 return 0;
1947
1948error:
1949#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001950 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001952 self->lock = NULL;
1953 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954#endif
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001955 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001956 return -1;
1957}
1958
1959static void
1960BZ2Decomp_dealloc(BZ2DecompObject *self)
1961{
1962#ifdef WITH_THREAD
1963 if (self->lock)
1964 PyThread_free_lock(self->lock);
1965#endif
1966 Py_XDECREF(self->unused_data);
1967 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001968 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001969}
1970
1971
1972/* ===================================================================== */
1973/* BZ2Decomp_Type definition. */
1974
1975PyDoc_STRVAR(BZ2Decomp__doc__,
1976"BZ2Decompressor() -> decompressor object\n\
1977\n\
1978Create a new decompressor object. This object may be used to decompress\n\
1979data sequentially. If you want to decompress data in one shot, use the\n\
1980decompress() function instead.\n\
1981");
1982
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001983static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001984 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001985 "bz2.BZ2Decompressor", /*tp_name*/
1986 sizeof(BZ2DecompObject), /*tp_basicsize*/
1987 0, /*tp_itemsize*/
1988 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1989 0, /*tp_print*/
1990 0, /*tp_getattr*/
1991 0, /*tp_setattr*/
1992 0, /*tp_compare*/
1993 0, /*tp_repr*/
1994 0, /*tp_as_number*/
1995 0, /*tp_as_sequence*/
1996 0, /*tp_as_mapping*/
1997 0, /*tp_hash*/
1998 0, /*tp_call*/
1999 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002000 PyObject_GenericGetAttr,/*tp_getattro*/
2001 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002002 0, /*tp_as_buffer*/
2003 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2004 BZ2Decomp__doc__, /*tp_doc*/
2005 0, /*tp_traverse*/
2006 0, /*tp_clear*/
2007 0, /*tp_richcompare*/
2008 0, /*tp_weaklistoffset*/
2009 0, /*tp_iter*/
2010 0, /*tp_iternext*/
2011 BZ2Decomp_methods, /*tp_methods*/
2012 BZ2Decomp_members, /*tp_members*/
2013 0, /*tp_getset*/
2014 0, /*tp_base*/
2015 0, /*tp_dict*/
2016 0, /*tp_descr_get*/
2017 0, /*tp_descr_set*/
2018 0, /*tp_dictoffset*/
2019 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002020 PyType_GenericAlloc, /*tp_alloc*/
2021 PyType_GenericNew, /*tp_new*/
2022 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023 0, /*tp_is_gc*/
2024};
2025
2026
2027/* ===================================================================== */
2028/* Module functions. */
2029
2030PyDoc_STRVAR(bz2_compress__doc__,
2031"compress(data [, compresslevel=9]) -> string\n\
2032\n\
2033Compress data in one shot. If you want to compress data sequentially,\n\
2034use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2035given, must be a number between 1 and 9.\n\
2036");
2037
2038static PyObject *
2039bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2040{
2041 int compresslevel=9;
2042 char *data;
2043 int datasize;
2044 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002045 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002046 bz_stream _bzs;
2047 bz_stream *bzs = &_bzs;
2048 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002049 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002050
2051 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2052 kwlist, &data, &datasize,
2053 &compresslevel))
2054 return NULL;
2055
2056 if (compresslevel < 1 || compresslevel > 9) {
2057 PyErr_SetString(PyExc_ValueError,
2058 "compresslevel must be between 1 and 9");
2059 return NULL;
2060 }
2061
2062 /* Conforming to bz2 manual, this is large enough to fit compressed
2063 * data in one shot. We will check it later anyway. */
2064 bufsize = datasize + (datasize/100+1) + 600;
2065
Christian Heimes593daf52008-05-26 12:51:38 +00002066 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002067 if (!ret)
2068 return NULL;
2069
2070 memset(bzs, 0, sizeof(bz_stream));
2071
2072 bzs->next_in = data;
2073 bzs->avail_in = datasize;
2074 bzs->next_out = BUF(ret);
2075 bzs->avail_out = bufsize;
2076
2077 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2078 if (bzerror != BZ_OK) {
2079 Util_CatchBZ2Error(bzerror);
2080 Py_DECREF(ret);
2081 return NULL;
2082 }
Tim Peterse3228092002-11-09 04:21:44 +00002083
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002084 for (;;) {
2085 Py_BEGIN_ALLOW_THREADS
2086 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2087 Py_END_ALLOW_THREADS
2088 if (bzerror == BZ_STREAM_END) {
2089 break;
2090 } else if (bzerror != BZ_FINISH_OK) {
2091 BZ2_bzCompressEnd(bzs);
2092 Util_CatchBZ2Error(bzerror);
2093 Py_DECREF(ret);
2094 return NULL;
2095 }
2096 if (bzs->avail_out == 0) {
2097 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes593daf52008-05-26 12:51:38 +00002098 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002099 BZ2_bzCompressEnd(bzs);
2100 Py_DECREF(ret);
2101 return NULL;
2102 }
2103 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2104 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2105 }
2106 }
2107
2108 if (bzs->avail_out != 0)
Christian Heimes593daf52008-05-26 12:51:38 +00002109 _PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002110 BZ2_bzCompressEnd(bzs);
2111
2112 return ret;
2113}
2114
2115PyDoc_STRVAR(bz2_decompress__doc__,
2116"decompress(data) -> decompressed data\n\
2117\n\
2118Decompress data in one shot. If you want to decompress data sequentially,\n\
2119use an instance of BZ2Decompressor instead.\n\
2120");
2121
2122static PyObject *
2123bz2_decompress(PyObject *self, PyObject *args)
2124{
2125 char *data;
2126 int datasize;
2127 int bufsize = SMALLCHUNK;
2128 PyObject *ret;
2129 bz_stream _bzs;
2130 bz_stream *bzs = &_bzs;
2131 int bzerror;
2132
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002133 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002134 return NULL;
2135
2136 if (datasize == 0)
Christian Heimes593daf52008-05-26 12:51:38 +00002137 return PyBytes_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002138
Christian Heimes593daf52008-05-26 12:51:38 +00002139 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002140 if (!ret)
2141 return NULL;
2142
2143 memset(bzs, 0, sizeof(bz_stream));
2144
2145 bzs->next_in = data;
2146 bzs->avail_in = datasize;
2147 bzs->next_out = BUF(ret);
2148 bzs->avail_out = bufsize;
2149
2150 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2151 if (bzerror != BZ_OK) {
2152 Util_CatchBZ2Error(bzerror);
2153 Py_DECREF(ret);
2154 return NULL;
2155 }
Tim Peterse3228092002-11-09 04:21:44 +00002156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157 for (;;) {
2158 Py_BEGIN_ALLOW_THREADS
2159 bzerror = BZ2_bzDecompress(bzs);
2160 Py_END_ALLOW_THREADS
2161 if (bzerror == BZ_STREAM_END) {
2162 break;
2163 } else if (bzerror != BZ_OK) {
2164 BZ2_bzDecompressEnd(bzs);
2165 Util_CatchBZ2Error(bzerror);
2166 Py_DECREF(ret);
2167 return NULL;
2168 }
Georg Brandla47337f2007-03-13 12:34:25 +00002169 if (bzs->avail_in == 0) {
2170 BZ2_bzDecompressEnd(bzs);
2171 PyErr_SetString(PyExc_ValueError,
2172 "couldn't find end of stream");
2173 Py_DECREF(ret);
2174 return NULL;
2175 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176 if (bzs->avail_out == 0) {
2177 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes593daf52008-05-26 12:51:38 +00002178 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002179 BZ2_bzDecompressEnd(bzs);
2180 Py_DECREF(ret);
2181 return NULL;
2182 }
2183 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2184 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002185 }
2186 }
2187
2188 if (bzs->avail_out != 0)
Christian Heimes593daf52008-05-26 12:51:38 +00002189 _PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002190 BZ2_bzDecompressEnd(bzs);
2191
2192 return ret;
2193}
2194
2195static PyMethodDef bz2_methods[] = {
2196 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2197 bz2_compress__doc__},
2198 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2199 bz2_decompress__doc__},
2200 {NULL, NULL} /* sentinel */
2201};
2202
2203/* ===================================================================== */
2204/* Initialization function. */
2205
2206PyDoc_STRVAR(bz2__doc__,
2207"The python bz2 module provides a comprehensive interface for\n\
2208the bz2 compression library. It implements a complete file\n\
2209interface, one shot (de)compression functions, and types for\n\
2210sequential (de)compression.\n\
2211");
2212
Neal Norwitz21d896c2003-07-01 20:15:21 +00002213PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002214initbz2(void)
2215{
2216 PyObject *m;
2217
Christian Heimese93237d2007-12-19 02:37:44 +00002218 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2219 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2220 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002221
2222 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002223 if (m == NULL)
2224 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002225
Christian Heimes593daf52008-05-26 12:51:38 +00002226 PyModule_AddObject(m, "__author__", PyBytes_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002227
2228 Py_INCREF(&BZ2File_Type);
2229 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2230
2231 Py_INCREF(&BZ2Comp_Type);
2232 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2233
2234 Py_INCREF(&BZ2Decomp_Type);
2235 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2236}