blob: 39292197d8ed27914e5a08608f01689f834ba19a [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimese93237d2007-12-19 02:37:44 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000238 int bytes_read;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000239 int newlinetypes = f->f_newlinetypes;
240 int skipnextlf = f->f_skipnextlf;
241 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242
243 total_v_size = n > 0 ? n : 100;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000244 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245 if (v == NULL)
246 return NULL;
247
248 buf = BUF(v);
249 end = buf + total_v_size;
250
251 for (;;) {
252 Py_BEGIN_ALLOW_THREADS
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000253 while (buf != end) {
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bytes_read == 0) break;
257 if (univ_newline) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000261 /* Seeing a \n here with skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 * saw a \r before.
263 */
264 newlinetypes |= NEWLINE_CRLF;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000265 if (bzerror != BZ_OK) break;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0) break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000269 } else {
270 newlinetypes |= NEWLINE_CR;
271 }
272 }
273 if (c == '\r') {
274 skipnextlf = 1;
275 c = '\n';
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000276 } else if (c == '\n')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000277 newlinetypes |= NEWLINE_LF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 }
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000279 *buf++ = c;
280 if (bzerror != BZ_OK || c == '\n') break;
281 }
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000285 f->f_newlinetypes = newlinetypes;
286 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000287 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->size = f->pos;
289 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290 break;
291 } else if (bzerror != BZ_OK) {
292 Util_CatchBZ2Error(bzerror);
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (c == '\n')
297 break;
298 /* Must be because buf == end */
299 if (n > 0)
300 break;
301 used_v_size = total_v_size;
302 increment = total_v_size >> 2; /* mild exponential growth */
303 total_v_size += increment;
304 if (total_v_size > INT_MAX) {
305 PyErr_SetString(PyExc_OverflowError,
306 "line is longer than a Python string can hold");
307 Py_DECREF(v);
308 return NULL;
309 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000310 if (_PyString_Resize(&v, total_v_size) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000311 return NULL;
312 buf = BUF(v) + used_v_size;
313 end = BUF(v) + total_v_size;
314 }
315
316 used_v_size = buf - BUF(v);
317 if (used_v_size != total_v_size)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000318 _PyString_Resize(&v, used_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000319 return v;
320}
321
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000322/* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
324size_t
325Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327{
328 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 int newlinetypes, skipnextlf;
330
331 assert(buf != NULL);
332 assert(stream != NULL);
333
334 if (!f->f_univ_newline)
335 return BZ2_bzRead(bzerror, stream, buf, n);
336
337 newlinetypes = f->f_newlinetypes;
338 skipnextlf = f->f_skipnextlf;
339
340 /* Invariant: n is the number of bytes remaining to be filled
341 * in the buffer.
342 */
343 while (n) {
344 size_t nread;
345 int shortread;
346 char *src = dst;
347
348 nread = BZ2_bzRead(bzerror, stream, dst, n);
349 assert(nread <= n);
350 n -= nread; /* assuming 1 byte out for each in; will adjust */
351 shortread = n != 0; /* true iff EOF or error */
352 while (nread--) {
353 char c = *src++;
354 if (c == '\r') {
355 /* Save as LF and set flag to skip next LF. */
356 *dst++ = '\n';
357 skipnextlf = 1;
358 }
359 else if (skipnextlf && c == '\n') {
360 /* Skip LF, and remember we saw CR LF. */
361 skipnextlf = 0;
362 newlinetypes |= NEWLINE_CRLF;
363 ++n;
364 }
365 else {
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
369 */
370 if (c == '\n')
371 newlinetypes |= NEWLINE_LF;
372 else if (skipnextlf)
373 newlinetypes |= NEWLINE_CR;
374 *dst++ = c;
375 skipnextlf = 0;
376 }
377 }
378 if (shortread) {
379 /* If this is EOF, update type flags. */
380 if (skipnextlf && *bzerror == BZ_STREAM_END)
381 newlinetypes |= NEWLINE_CR;
382 break;
383 }
384 }
385 f->f_newlinetypes = newlinetypes;
386 f->f_skipnextlf = skipnextlf;
387 return dst - buf;
388}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389
390/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
391static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000392Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000393{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394 if (f->f_buf != NULL) {
395 PyMem_Free(f->f_buf);
396 f->f_buf = NULL;
397 }
398}
399
400/* This is a hacked version of Python's fileobject.c:readahead(). */
401static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
404 int chunksize;
405 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406
407 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000408 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409 return 0;
410 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000411 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000414 f->f_bufptr = f->f_buf;
415 f->f_bufend = f->f_buf;
416 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 }
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Antoine Pitrou016b3662008-08-09 17:22:25 +0000419 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000420 return -1;
421 }
422 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
424 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000425 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000426 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 f->size = f->pos;
429 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 } else if (bzerror != BZ_OK) {
431 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000432 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000433 return -1;
434 }
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf + chunksize;
437 return 0;
438}
439
440/* This is a hacked version of Python's
441 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000442static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000443Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000444{
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 PyStringObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000446 char *bufptr;
447 char *buf;
448 int len;
449
450 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000451 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000452 return NULL;
453
454 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000455 if (len == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000456 return (PyStringObject *)
457 PyString_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000458 bufptr = memchr(f->f_bufptr, '\n', len);
459 if (bufptr != NULL) {
460 bufptr++; /* Count the '\n' */
461 len = bufptr - f->f_bufptr;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000462 s = (PyStringObject *)
463 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000464 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000465 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000466 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 f->f_bufptr = bufptr;
468 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000469 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 } else {
471 bufptr = f->f_bufptr;
472 buf = f->f_buf;
473 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 s = Util_ReadAheadGetLineSkip(f, skip+len,
475 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000476 if (s == NULL) {
477 PyMem_Free(buf);
478 return NULL;
479 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000480 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 PyMem_Free(buf);
482 }
483 return s;
484}
485
486/* ===================================================================== */
487/* Methods of BZ2File. */
488
489PyDoc_STRVAR(BZ2File_read__doc__,
490"read([size]) -> string\n\
491\n\
492Read at most size uncompressed bytes, returned as a string. If the size\n\
493argument is negative or omitted, read until EOF is reached.\n\
494");
495
496/* This is a hacked version of Python's fileobject.c:file_read(). */
497static PyObject *
498BZ2File_read(BZ2FileObject *self, PyObject *args)
499{
500 long bytesrequested = -1;
501 size_t bytesread, buffersize, chunksize;
502 int bzerror;
503 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000504
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000505 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
506 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000507
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000508 ACQUIRE_LOCK(self);
509 switch (self->mode) {
510 case MODE_READ:
511 break;
512 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000513 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000514 goto cleanup;
515 case MODE_CLOSED:
516 PyErr_SetString(PyExc_ValueError,
517 "I/O operation on closed file");
518 goto cleanup;
519 default:
520 PyErr_SetString(PyExc_IOError,
521 "file is not ready for reading");
522 goto cleanup;
523 }
524
525 if (bytesrequested < 0)
526 buffersize = Util_NewBufferSize((size_t)0);
527 else
528 buffersize = bytesrequested;
529 if (buffersize > INT_MAX) {
530 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000531 "requested number of bytes is "
532 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000533 goto cleanup;
534 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000535 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000536 if (ret == NULL)
537 goto cleanup;
538 bytesread = 0;
539
540 for (;;) {
541 Py_BEGIN_ALLOW_THREADS
542 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
543 BUF(ret)+bytesread,
544 buffersize-bytesread,
545 self);
546 self->pos += chunksize;
547 Py_END_ALLOW_THREADS
548 bytesread += chunksize;
549 if (bzerror == BZ_STREAM_END) {
550 self->size = self->pos;
551 self->mode = MODE_READ_EOF;
552 break;
553 } else if (bzerror != BZ_OK) {
554 Util_CatchBZ2Error(bzerror);
555 Py_DECREF(ret);
556 ret = NULL;
557 goto cleanup;
558 }
559 if (bytesrequested < 0) {
560 buffersize = Util_NewBufferSize(buffersize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000561 if (_PyString_Resize(&ret, buffersize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000562 goto cleanup;
563 } else {
564 break;
565 }
566 }
567 if (bytesread != buffersize)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000568 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000569
570cleanup:
571 RELEASE_LOCK(self);
572 return ret;
573}
574
575PyDoc_STRVAR(BZ2File_readline__doc__,
576"readline([size]) -> string\n\
577\n\
578Return the next line from the file, as a string, retaining newline.\n\
579A non-negative size argument will limit the maximum number of bytes to\n\
580return (an incomplete line may be returned then). Return an empty\n\
581string at EOF.\n\
582");
583
584static PyObject *
585BZ2File_readline(BZ2FileObject *self, PyObject *args)
586{
587 PyObject *ret = NULL;
588 int sizehint = -1;
589
590 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
591 return NULL;
592
593 ACQUIRE_LOCK(self);
594 switch (self->mode) {
595 case MODE_READ:
596 break;
597 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000598 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000599 goto cleanup;
600 case MODE_CLOSED:
601 PyErr_SetString(PyExc_ValueError,
602 "I/O operation on closed file");
603 goto cleanup;
604 default:
605 PyErr_SetString(PyExc_IOError,
606 "file is not ready for reading");
607 goto cleanup;
608 }
609
610 if (sizehint == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000611 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000612 else
613 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
614
615cleanup:
616 RELEASE_LOCK(self);
617 return ret;
618}
619
620PyDoc_STRVAR(BZ2File_readlines__doc__,
621"readlines([size]) -> list\n\
622\n\
623Call readline() repeatedly and return a list of lines read.\n\
624The optional size argument, if given, is an approximate bound on the\n\
625total number of bytes in the lines returned.\n\
626");
627
628/* This is a hacked version of Python's fileobject.c:file_readlines(). */
629static PyObject *
630BZ2File_readlines(BZ2FileObject *self, PyObject *args)
631{
632 long sizehint = 0;
633 PyObject *list = NULL;
634 PyObject *line;
635 char small_buffer[SMALLCHUNK];
636 char *buffer = small_buffer;
637 size_t buffersize = SMALLCHUNK;
638 PyObject *big_buffer = NULL;
639 size_t nfilled = 0;
640 size_t nread;
641 size_t totalread = 0;
642 char *p, *q, *end;
643 int err;
644 int shortread = 0;
645 int bzerror;
646
647 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
648 return NULL;
649
650 ACQUIRE_LOCK(self);
651 switch (self->mode) {
652 case MODE_READ:
653 break;
654 case MODE_READ_EOF:
655 list = PyList_New(0);
656 goto cleanup;
657 case MODE_CLOSED:
658 PyErr_SetString(PyExc_ValueError,
659 "I/O operation on closed file");
660 goto cleanup;
661 default:
662 PyErr_SetString(PyExc_IOError,
663 "file is not ready for reading");
664 goto cleanup;
665 }
666
667 if ((list = PyList_New(0)) == NULL)
668 goto cleanup;
669
670 for (;;) {
671 Py_BEGIN_ALLOW_THREADS
672 nread = Util_UnivNewlineRead(&bzerror, self->fp,
673 buffer+nfilled,
674 buffersize-nfilled, self);
675 self->pos += nread;
676 Py_END_ALLOW_THREADS
677 if (bzerror == BZ_STREAM_END) {
678 self->size = self->pos;
679 self->mode = MODE_READ_EOF;
680 if (nread == 0) {
681 sizehint = 0;
682 break;
683 }
684 shortread = 1;
685 } else if (bzerror != BZ_OK) {
686 Util_CatchBZ2Error(bzerror);
687 error:
688 Py_DECREF(list);
689 list = NULL;
690 goto cleanup;
691 }
692 totalread += nread;
693 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000694 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695 /* Need a larger buffer to fit this line */
696 nfilled += nread;
697 buffersize *= 2;
698 if (buffersize > INT_MAX) {
699 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000700 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000701 goto error;
702 }
703 if (big_buffer == NULL) {
704 /* Create the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000705 big_buffer = PyString_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 NULL, buffersize);
707 if (big_buffer == NULL)
708 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000709 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000710 memcpy(buffer, small_buffer, nfilled);
711 }
712 else {
713 /* Grow the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000714 _PyString_Resize(&big_buffer, buffersize);
715 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000716 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000717 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000718 }
719 end = buffer+nfilled+nread;
720 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000721 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 /* Process complete lines */
723 p++;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000724 line = PyString_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000725 if (line == NULL)
726 goto error;
727 err = PyList_Append(list, line);
728 Py_DECREF(line);
729 if (err != 0)
730 goto error;
731 q = p;
732 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000734 /* Move the remaining incomplete line to the start */
735 nfilled = end-q;
736 memmove(buffer, q, nfilled);
737 if (sizehint > 0)
738 if (totalread >= (size_t)sizehint)
739 break;
740 if (shortread) {
741 sizehint = 0;
742 break;
743 }
744 }
745 if (nfilled != 0) {
746 /* Partial last line */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000747 line = PyString_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000748 if (line == NULL)
749 goto error;
750 if (sizehint > 0) {
751 /* Need to complete the last line */
752 PyObject *rest = Util_GetLine(self, 0);
753 if (rest == NULL) {
754 Py_DECREF(line);
755 goto error;
756 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000757 PyString_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000758 Py_DECREF(rest);
759 if (line == NULL)
760 goto error;
761 }
762 err = PyList_Append(list, line);
763 Py_DECREF(line);
764 if (err != 0)
765 goto error;
766 }
767
768 cleanup:
769 RELEASE_LOCK(self);
770 if (big_buffer) {
771 Py_DECREF(big_buffer);
772 }
773 return list;
774}
775
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000776PyDoc_STRVAR(BZ2File_xreadlines__doc__,
777"xreadlines() -> self\n\
778\n\
779For backward compatibility. BZ2File objects now include the performance\n\
780optimizations previously implemented in the xreadlines module.\n\
781");
782
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000783PyDoc_STRVAR(BZ2File_write__doc__,
784"write(data) -> None\n\
785\n\
786Write the 'data' string to file. Note that due to buffering, close() may\n\
787be needed before the file on disk reflects the data written.\n\
788");
789
790/* This is a hacked version of Python's fileobject.c:file_write(). */
791static PyObject *
792BZ2File_write(BZ2FileObject *self, PyObject *args)
793{
794 PyObject *ret = NULL;
795 char *buf;
796 int len;
797 int bzerror;
798
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000799 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000800 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000801
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000802 ACQUIRE_LOCK(self);
803 switch (self->mode) {
804 case MODE_WRITE:
805 break;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 case MODE_CLOSED:
808 PyErr_SetString(PyExc_ValueError,
809 "I/O operation on closed file");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000810 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 default:
813 PyErr_SetString(PyExc_IOError,
814 "file is not ready for writing");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000815 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 }
817
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000818 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000819
820 Py_BEGIN_ALLOW_THREADS
821 BZ2_bzWrite (&bzerror, self->fp, buf, len);
822 self->pos += len;
823 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000824
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000825 if (bzerror != BZ_OK) {
826 Util_CatchBZ2Error(bzerror);
827 goto cleanup;
828 }
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 Py_INCREF(Py_None);
831 ret = Py_None;
832
833cleanup:
834 RELEASE_LOCK(self);
835 return ret;
836}
837
838PyDoc_STRVAR(BZ2File_writelines__doc__,
839"writelines(sequence_of_strings) -> None\n\
840\n\
841Write the sequence of strings to the file. Note that newlines are not\n\
842added. The sequence can be any iterable object producing strings. This is\n\
843equivalent to calling write() for each string.\n\
844");
845
846/* This is a hacked version of Python's fileobject.c:file_writelines(). */
847static PyObject *
848BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
849{
850#define CHUNKSIZE 1000
851 PyObject *list = NULL;
852 PyObject *iter = NULL;
853 PyObject *ret = NULL;
854 PyObject *line;
855 int i, j, index, len, islist;
856 int bzerror;
857
858 ACQUIRE_LOCK(self);
Georg Brandl3335a7a2006-08-14 21:42:55 +0000859 switch (self->mode) {
860 case MODE_WRITE:
861 break;
862
863 case MODE_CLOSED:
864 PyErr_SetString(PyExc_ValueError,
865 "I/O operation on closed file");
866 goto error;
867
868 default:
869 PyErr_SetString(PyExc_IOError,
870 "file is not ready for writing");
871 goto error;
872 }
873
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000874 islist = PyList_Check(seq);
875 if (!islist) {
876 iter = PyObject_GetIter(seq);
877 if (iter == NULL) {
878 PyErr_SetString(PyExc_TypeError,
879 "writelines() requires an iterable argument");
880 goto error;
881 }
882 list = PyList_New(CHUNKSIZE);
883 if (list == NULL)
884 goto error;
885 }
886
887 /* Strategy: slurp CHUNKSIZE lines into a private list,
888 checking that they are all strings, then write that list
889 without holding the interpreter lock, then come back for more. */
890 for (index = 0; ; index += CHUNKSIZE) {
891 if (islist) {
892 Py_XDECREF(list);
893 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
894 if (list == NULL)
895 goto error;
896 j = PyList_GET_SIZE(list);
897 }
898 else {
899 for (j = 0; j < CHUNKSIZE; j++) {
900 line = PyIter_Next(iter);
901 if (line == NULL) {
902 if (PyErr_Occurred())
903 goto error;
904 break;
905 }
906 PyList_SetItem(list, j, line);
907 }
908 }
909 if (j == 0)
910 break;
911
912 /* Check that all entries are indeed strings. If not,
913 apply the same rules as for file.write() and
914 convert the rets to strings. This is slow, but
915 seems to be the only way since all conversion APIs
916 could potentially execute Python code. */
917 for (i = 0; i < j; i++) {
918 PyObject *v = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000919 if (!PyString_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000920 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000922 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
923 PyErr_SetString(PyExc_TypeError,
924 "writelines() "
925 "argument must be "
926 "a sequence of "
927 "strings");
928 goto error;
929 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000930 line = PyString_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000931 len);
932 if (line == NULL)
933 goto error;
934 Py_DECREF(v);
935 PyList_SET_ITEM(list, i, line);
936 }
937 }
938
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000939 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000940
941 /* Since we are releasing the global lock, the
942 following code may *not* execute Python code. */
943 Py_BEGIN_ALLOW_THREADS
944 for (i = 0; i < j; i++) {
945 line = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000946 len = PyString_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000947 BZ2_bzWrite (&bzerror, self->fp,
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000948 PyString_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949 if (bzerror != BZ_OK) {
950 Py_BLOCK_THREADS
951 Util_CatchBZ2Error(bzerror);
952 goto error;
953 }
954 }
955 Py_END_ALLOW_THREADS
956
957 if (j < CHUNKSIZE)
958 break;
959 }
960
961 Py_INCREF(Py_None);
962 ret = Py_None;
963
964 error:
965 RELEASE_LOCK(self);
966 Py_XDECREF(list);
967 Py_XDECREF(iter);
968 return ret;
969#undef CHUNKSIZE
970}
971
972PyDoc_STRVAR(BZ2File_seek__doc__,
973"seek(offset [, whence]) -> None\n\
974\n\
975Move to new file position. Argument offset is a byte count. Optional\n\
976argument whence defaults to 0 (offset from start of file, offset\n\
977should be >= 0); other values are 1 (move relative to current position,\n\
978positive or negative), and 2 (move relative to end of file, usually\n\
979negative, although many platforms allow seeking beyond the end of a file).\n\
980\n\
981Note that seeking of bz2 files is emulated, and depending on the parameters\n\
982the operation may be extremely slow.\n\
983");
984
985static PyObject *
986BZ2File_seek(BZ2FileObject *self, PyObject *args)
987{
988 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000989 PyObject *offobj;
990 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000991 char small_buffer[SMALLCHUNK];
992 char *buffer = small_buffer;
993 size_t buffersize = SMALLCHUNK;
Andrew M. Kuchling44b054b2006-12-18 19:22:24 +0000994 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000995 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000996 int chunksize;
997 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000998 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000999
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001000 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1001 return NULL;
1002#if !defined(HAVE_LARGEFILE_SUPPORT)
1003 offset = PyInt_AsLong(offobj);
1004#else
1005 offset = PyLong_Check(offobj) ?
1006 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1007#endif
1008 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001009 return NULL;
1010
1011 ACQUIRE_LOCK(self);
1012 Util_DropReadAhead(self);
1013 switch (self->mode) {
1014 case MODE_READ:
1015 case MODE_READ_EOF:
1016 break;
Tim Peterse3228092002-11-09 04:21:44 +00001017
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001018 case MODE_CLOSED:
1019 PyErr_SetString(PyExc_ValueError,
1020 "I/O operation on closed file");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001021 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001022
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001023 default:
1024 PyErr_SetString(PyExc_IOError,
1025 "seek works only while reading");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001026 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001027 }
1028
Georg Brandl47fab922006-02-18 21:57:25 +00001029 if (where == 2) {
1030 if (self->size == -1) {
1031 assert(self->mode != MODE_READ_EOF);
1032 for (;;) {
1033 Py_BEGIN_ALLOW_THREADS
1034 chunksize = Util_UnivNewlineRead(
1035 &bzerror, self->fp,
1036 buffer, buffersize,
1037 self);
1038 self->pos += chunksize;
1039 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001040
Georg Brandl47fab922006-02-18 21:57:25 +00001041 bytesread += chunksize;
1042 if (bzerror == BZ_STREAM_END) {
1043 break;
1044 } else if (bzerror != BZ_OK) {
1045 Util_CatchBZ2Error(bzerror);
1046 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001048 }
Georg Brandl47fab922006-02-18 21:57:25 +00001049 self->mode = MODE_READ_EOF;
1050 self->size = self->pos;
1051 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052 }
Georg Brandl47fab922006-02-18 21:57:25 +00001053 offset = self->size + offset;
1054 } else if (where == 1) {
1055 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 }
1057
Georg Brandl47fab922006-02-18 21:57:25 +00001058 /* Before getting here, offset must be the absolute position the file
1059 * pointer should be set to. */
1060
1061 if (offset >= self->pos) {
1062 /* we can move forward */
1063 offset -= self->pos;
1064 } else {
1065 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001066 BZ2_bzReadClose(&bzerror, self->fp);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001067 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001068 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001069 self->fp = NULL;
1070 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001071 if (bzerror != BZ_OK) {
1072 Util_CatchBZ2Error(bzerror);
1073 goto cleanup;
1074 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001075 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076 if (!ret)
1077 goto cleanup;
1078 Py_DECREF(ret);
1079 ret = NULL;
1080 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001081 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001082 0, 0, NULL, 0);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001083 if (self->fp)
Gregory P. Smith73bee442008-04-12 20:37:48 +00001084 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001085 if (bzerror != BZ_OK) {
1086 Util_CatchBZ2Error(bzerror);
1087 goto cleanup;
1088 }
1089 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001090 }
1091
Georg Brandl47fab922006-02-18 21:57:25 +00001092 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001093 goto exit;
1094
1095 /* Before getting here, offset must be set to the number of bytes
1096 * to walk forward. */
1097 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001098 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001099 readsize = buffersize;
1100 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001101 /* offset might be wider that readsize, but the result
1102 * of the subtraction is bound by buffersize (see the
1103 * condition above). buffersize is 8192. */
1104 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001105 Py_BEGIN_ALLOW_THREADS
1106 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1107 buffer, readsize, self);
1108 self->pos += chunksize;
1109 Py_END_ALLOW_THREADS
1110 bytesread += chunksize;
1111 if (bzerror == BZ_STREAM_END) {
1112 self->size = self->pos;
1113 self->mode = MODE_READ_EOF;
1114 break;
1115 } else if (bzerror != BZ_OK) {
1116 Util_CatchBZ2Error(bzerror);
1117 goto cleanup;
1118 }
1119 if (bytesread == offset)
1120 break;
1121 }
1122
1123exit:
1124 Py_INCREF(Py_None);
1125 ret = Py_None;
1126
1127cleanup:
1128 RELEASE_LOCK(self);
1129 return ret;
1130}
1131
1132PyDoc_STRVAR(BZ2File_tell__doc__,
1133"tell() -> int\n\
1134\n\
1135Return the current file position, an integer (may be a long integer).\n\
1136");
1137
1138static PyObject *
1139BZ2File_tell(BZ2FileObject *self, PyObject *args)
1140{
1141 PyObject *ret = NULL;
1142
1143 if (self->mode == MODE_CLOSED) {
1144 PyErr_SetString(PyExc_ValueError,
1145 "I/O operation on closed file");
1146 goto cleanup;
1147 }
1148
Georg Brandla8bcecc2005-09-03 07:49:53 +00001149#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001150 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001151#else
1152 ret = PyLong_FromLongLong(self->pos);
1153#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154
1155cleanup:
1156 return ret;
1157}
1158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001159PyDoc_STRVAR(BZ2File_close__doc__,
1160"close() -> None or (perhaps) an integer\n\
1161\n\
1162Close the file. Sets data attribute .closed to true. A closed file\n\
1163cannot be used for further I/O operations. close() may be called more\n\
1164than once without error.\n\
1165");
1166
1167static PyObject *
1168BZ2File_close(BZ2FileObject *self)
1169{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001170 PyObject *ret = NULL;
1171 int bzerror = BZ_OK;
1172
1173 ACQUIRE_LOCK(self);
1174 switch (self->mode) {
1175 case MODE_READ:
1176 case MODE_READ_EOF:
1177 BZ2_bzReadClose(&bzerror, self->fp);
1178 break;
1179 case MODE_WRITE:
1180 BZ2_bzWriteClose(&bzerror, self->fp,
1181 0, NULL, NULL);
1182 break;
1183 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001184 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001185 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001186 self->fp = NULL;
1187 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001188 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001189 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001190 if (bzerror != BZ_OK) {
1191 Util_CatchBZ2Error(bzerror);
1192 Py_XDECREF(ret);
1193 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194 }
1195
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196 RELEASE_LOCK(self);
1197 return ret;
1198}
1199
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001200static PyObject *BZ2File_getiter(BZ2FileObject *self);
1201
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001202static PyMethodDef BZ2File_methods[] = {
1203 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1204 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1205 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001206 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001207 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1208 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1209 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1210 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001211 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1212 {NULL, NULL} /* sentinel */
1213};
1214
1215
1216/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001217/* Getters and setters of BZ2File. */
1218
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001219/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1220static PyObject *
1221BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1222{
1223 switch (self->f_newlinetypes) {
1224 case NEWLINE_UNKNOWN:
1225 Py_INCREF(Py_None);
1226 return Py_None;
1227 case NEWLINE_CR:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001228 return PyString_FromString("\r");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001229 case NEWLINE_LF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001230 return PyString_FromString("\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001231 case NEWLINE_CR|NEWLINE_LF:
1232 return Py_BuildValue("(ss)", "\r", "\n");
1233 case NEWLINE_CRLF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001234 return PyString_FromString("\r\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001235 case NEWLINE_CR|NEWLINE_CRLF:
1236 return Py_BuildValue("(ss)", "\r", "\r\n");
1237 case NEWLINE_LF|NEWLINE_CRLF:
1238 return Py_BuildValue("(ss)", "\n", "\r\n");
1239 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1240 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1241 default:
1242 PyErr_Format(PyExc_SystemError,
1243 "Unknown newlines value 0x%x\n",
1244 self->f_newlinetypes);
1245 return NULL;
1246 }
1247}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001248
1249static PyObject *
1250BZ2File_get_closed(BZ2FileObject *self, void *closure)
1251{
1252 return PyInt_FromLong(self->mode == MODE_CLOSED);
1253}
1254
1255static PyObject *
1256BZ2File_get_mode(BZ2FileObject *self, void *closure)
1257{
1258 return PyObject_GetAttrString(self->file, "mode");
1259}
1260
1261static PyObject *
1262BZ2File_get_name(BZ2FileObject *self, void *closure)
1263{
1264 return PyObject_GetAttrString(self->file, "name");
1265}
1266
1267static PyGetSetDef BZ2File_getset[] = {
1268 {"closed", (getter)BZ2File_get_closed, NULL,
1269 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001270 {"newlines", (getter)BZ2File_get_newlines, NULL,
1271 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001272 {"mode", (getter)BZ2File_get_mode, NULL,
1273 "file mode ('r', 'w', or 'U')"},
1274 {"name", (getter)BZ2File_get_name, NULL,
1275 "file name"},
1276 {NULL} /* Sentinel */
1277};
1278
1279
1280/* ===================================================================== */
1281/* Members of BZ2File_Type. */
1282
1283#undef OFF
1284#define OFF(x) offsetof(BZ2FileObject, x)
1285
1286static PyMemberDef BZ2File_members[] = {
1287 {"softspace", T_INT, OFF(f_softspace), 0,
1288 "flag indicating that a space needs to be printed; used by print"},
1289 {NULL} /* Sentinel */
1290};
1291
1292/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001293/* Slot definitions for BZ2File_Type. */
1294
1295static int
1296BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1297{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001298 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001299 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001300 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001301 char *mode = "r";
1302 int buffering = -1;
1303 int compresslevel = 9;
1304 int bzerror;
1305 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001306
1307 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001308
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001309 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1310 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001311 &compresslevel))
1312 return -1;
1313
1314 if (compresslevel < 1 || compresslevel > 9) {
1315 PyErr_SetString(PyExc_ValueError,
1316 "compresslevel must be between 1 and 9");
1317 return -1;
1318 }
1319
1320 for (;;) {
1321 int error = 0;
1322 switch (*mode) {
1323 case 'r':
1324 case 'w':
1325 if (mode_char)
1326 error = 1;
1327 mode_char = *mode;
1328 break;
1329
1330 case 'b':
1331 break;
1332
1333 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001334#ifdef __VMS
1335 self->f_univ_newline = 0;
1336#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001337 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001338#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001339 break;
1340
1341 default:
1342 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001343 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001344 }
1345 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001346 PyErr_Format(PyExc_ValueError,
1347 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348 return -1;
1349 }
1350 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001351 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001352 break;
1353 }
1354
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001355 if (mode_char == 0) {
1356 mode_char = 'r';
1357 }
1358
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001359 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001360
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001361 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1362 name, mode, buffering);
1363 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001364 return -1;
1365
1366 /* From now on, we have stuff to dealloc, so jump to error label
1367 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001368
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369#ifdef WITH_THREAD
1370 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001371 if (!self->lock) {
1372 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001374 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375#endif
1376
1377 if (mode_char == 'r')
1378 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001379 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001380 0, 0, NULL, 0);
1381 else
1382 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001383 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001384 compresslevel, 0, 0);
1385
1386 if (bzerror != BZ_OK) {
1387 Util_CatchBZ2Error(bzerror);
1388 goto error;
1389 }
Gregory P. Smith73bee442008-04-12 20:37:48 +00001390 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001391
1392 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1393
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001394 return 0;
1395
1396error:
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001397 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001399 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001400 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001401 self->lock = NULL;
1402 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001403#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001404 return -1;
1405}
1406
1407static void
1408BZ2File_dealloc(BZ2FileObject *self)
1409{
1410 int bzerror;
1411#ifdef WITH_THREAD
1412 if (self->lock)
1413 PyThread_free_lock(self->lock);
1414#endif
1415 switch (self->mode) {
1416 case MODE_READ:
1417 case MODE_READ_EOF:
1418 BZ2_bzReadClose(&bzerror, self->fp);
1419 break;
1420 case MODE_WRITE:
1421 BZ2_bzWriteClose(&bzerror, self->fp,
1422 0, NULL, NULL);
1423 break;
1424 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001425 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001426 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001427 self->fp = NULL;
1428 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001429 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001430 Py_XDECREF(self->file);
Christian Heimese93237d2007-12-19 02:37:44 +00001431 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001432}
1433
1434/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1435static PyObject *
1436BZ2File_getiter(BZ2FileObject *self)
1437{
1438 if (self->mode == MODE_CLOSED) {
1439 PyErr_SetString(PyExc_ValueError,
1440 "I/O operation on closed file");
1441 return NULL;
1442 }
1443 Py_INCREF((PyObject*)self);
1444 return (PyObject *)self;
1445}
1446
1447/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1448#define READAHEAD_BUFSIZE 8192
1449static PyObject *
1450BZ2File_iternext(BZ2FileObject *self)
1451{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001452 PyStringObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001453 ACQUIRE_LOCK(self);
1454 if (self->mode == MODE_CLOSED) {
Gregory P. Smith3b1e6b22008-07-07 04:31:58 +00001455 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001456 PyErr_SetString(PyExc_ValueError,
1457 "I/O operation on closed file");
1458 return NULL;
1459 }
1460 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1461 RELEASE_LOCK(self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001462 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001463 Py_XDECREF(ret);
1464 return NULL;
1465 }
1466 return (PyObject *)ret;
1467}
1468
1469/* ===================================================================== */
1470/* BZ2File_Type definition. */
1471
1472PyDoc_VAR(BZ2File__doc__) =
1473PyDoc_STR(
1474"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1475\n\
1476Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1477writing. When opened for writing, the file will be created if it doesn't\n\
1478exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1479unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1480is given, must be a number between 1 and 9.\n\
1481")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001482PyDoc_STR(
1483"\n\
1484Add a 'U' to mode to open the file for input with universal newline\n\
1485support. Any line ending in the input file will be seen as a '\\n' in\n\
1486Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1487for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1488'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1489newlines are available only when reading.\n\
1490")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001491;
1492
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001493static PyTypeObject BZ2File_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001494 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001495 "bz2.BZ2File", /*tp_name*/
1496 sizeof(BZ2FileObject), /*tp_basicsize*/
1497 0, /*tp_itemsize*/
1498 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1499 0, /*tp_print*/
1500 0, /*tp_getattr*/
1501 0, /*tp_setattr*/
1502 0, /*tp_compare*/
1503 0, /*tp_repr*/
1504 0, /*tp_as_number*/
1505 0, /*tp_as_sequence*/
1506 0, /*tp_as_mapping*/
1507 0, /*tp_hash*/
1508 0, /*tp_call*/
1509 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001510 PyObject_GenericGetAttr,/*tp_getattro*/
1511 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001512 0, /*tp_as_buffer*/
1513 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1514 BZ2File__doc__, /*tp_doc*/
1515 0, /*tp_traverse*/
1516 0, /*tp_clear*/
1517 0, /*tp_richcompare*/
1518 0, /*tp_weaklistoffset*/
1519 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1520 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1521 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001522 BZ2File_members, /*tp_members*/
1523 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001524 0, /*tp_base*/
1525 0, /*tp_dict*/
1526 0, /*tp_descr_get*/
1527 0, /*tp_descr_set*/
1528 0, /*tp_dictoffset*/
1529 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001530 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001531 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001532 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001533 0, /*tp_is_gc*/
1534};
1535
1536
1537/* ===================================================================== */
1538/* Methods of BZ2Comp. */
1539
1540PyDoc_STRVAR(BZ2Comp_compress__doc__,
1541"compress(data) -> string\n\
1542\n\
1543Provide more data to the compressor object. It will return chunks of\n\
1544compressed data whenever possible. When you've finished providing data\n\
1545to compress, call the flush() method to finish the compression process,\n\
1546and return what is left in the internal buffers.\n\
1547");
1548
1549static PyObject *
1550BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1551{
1552 char *data;
1553 int datasize;
1554 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001555 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001556 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001557 bz_stream *bzs = &self->bzs;
1558 int bzerror;
1559
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001560 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001561 return NULL;
1562
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001563 if (datasize == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001564 return PyString_FromString("");
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001565
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001566 ACQUIRE_LOCK(self);
1567 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001568 PyErr_SetString(PyExc_ValueError,
1569 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001570 goto error;
1571 }
1572
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001573 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574 if (!ret)
1575 goto error;
1576
1577 bzs->next_in = data;
1578 bzs->avail_in = datasize;
1579 bzs->next_out = BUF(ret);
1580 bzs->avail_out = bufsize;
1581
1582 totalout = BZS_TOTAL_OUT(bzs);
1583
1584 for (;;) {
1585 Py_BEGIN_ALLOW_THREADS
1586 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1587 Py_END_ALLOW_THREADS
1588 if (bzerror != BZ_RUN_OK) {
1589 Util_CatchBZ2Error(bzerror);
1590 goto error;
1591 }
Georg Brandla47337f2007-03-13 12:34:25 +00001592 if (bzs->avail_in == 0)
1593 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001594 if (bzs->avail_out == 0) {
1595 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001596 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001597 BZ2_bzCompressEnd(bzs);
1598 goto error;
1599 }
1600 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1601 - totalout);
1602 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001603 }
1604 }
1605
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001606 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001607
1608 RELEASE_LOCK(self);
1609 return ret;
1610
1611error:
1612 RELEASE_LOCK(self);
1613 Py_XDECREF(ret);
1614 return NULL;
1615}
1616
1617PyDoc_STRVAR(BZ2Comp_flush__doc__,
1618"flush() -> string\n\
1619\n\
1620Finish the compression process and return what is left in internal buffers.\n\
1621You must not use the compressor object after calling this method.\n\
1622");
1623
1624static PyObject *
1625BZ2Comp_flush(BZ2CompObject *self)
1626{
1627 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001628 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001629 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001630 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001631 int bzerror;
1632
1633 ACQUIRE_LOCK(self);
1634 if (!self->running) {
1635 PyErr_SetString(PyExc_ValueError, "object was already "
1636 "flushed");
1637 goto error;
1638 }
1639 self->running = 0;
1640
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001641 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001642 if (!ret)
1643 goto error;
1644
1645 bzs->next_out = BUF(ret);
1646 bzs->avail_out = bufsize;
1647
1648 totalout = BZS_TOTAL_OUT(bzs);
1649
1650 for (;;) {
1651 Py_BEGIN_ALLOW_THREADS
1652 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1653 Py_END_ALLOW_THREADS
1654 if (bzerror == BZ_STREAM_END) {
1655 break;
1656 } else if (bzerror != BZ_FINISH_OK) {
1657 Util_CatchBZ2Error(bzerror);
1658 goto error;
1659 }
1660 if (bzs->avail_out == 0) {
1661 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001662 if (_PyString_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001663 goto error;
1664 bzs->next_out = BUF(ret);
1665 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1666 - totalout);
1667 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1668 }
1669 }
1670
1671 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001672 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001673
1674 RELEASE_LOCK(self);
1675 return ret;
1676
1677error:
1678 RELEASE_LOCK(self);
1679 Py_XDECREF(ret);
1680 return NULL;
1681}
1682
1683static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001684 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1685 BZ2Comp_compress__doc__},
1686 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1687 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001688 {NULL, NULL} /* sentinel */
1689};
1690
1691
1692/* ===================================================================== */
1693/* Slot definitions for BZ2Comp_Type. */
1694
1695static int
1696BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1697{
1698 int compresslevel = 9;
1699 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001700 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001701
1702 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1703 kwlist, &compresslevel))
1704 return -1;
1705
1706 if (compresslevel < 1 || compresslevel > 9) {
1707 PyErr_SetString(PyExc_ValueError,
1708 "compresslevel must be between 1 and 9");
1709 goto error;
1710 }
1711
1712#ifdef WITH_THREAD
1713 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001714 if (!self->lock) {
1715 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001716 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001717 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001718#endif
1719
1720 memset(&self->bzs, 0, sizeof(bz_stream));
1721 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1722 if (bzerror != BZ_OK) {
1723 Util_CatchBZ2Error(bzerror);
1724 goto error;
1725 }
1726
1727 self->running = 1;
1728
1729 return 0;
1730error:
1731#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001732 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001733 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001734 self->lock = NULL;
1735 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001736#endif
1737 return -1;
1738}
1739
1740static void
1741BZ2Comp_dealloc(BZ2CompObject *self)
1742{
1743#ifdef WITH_THREAD
1744 if (self->lock)
1745 PyThread_free_lock(self->lock);
1746#endif
1747 BZ2_bzCompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001748 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001749}
1750
1751
1752/* ===================================================================== */
1753/* BZ2Comp_Type definition. */
1754
1755PyDoc_STRVAR(BZ2Comp__doc__,
1756"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1757\n\
1758Create a new compressor object. This object may be used to compress\n\
1759data sequentially. If you want to compress data in one shot, use the\n\
1760compress() function instead. The compresslevel parameter, if given,\n\
1761must be a number between 1 and 9.\n\
1762");
1763
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001764static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001765 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001766 "bz2.BZ2Compressor", /*tp_name*/
1767 sizeof(BZ2CompObject), /*tp_basicsize*/
1768 0, /*tp_itemsize*/
1769 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1770 0, /*tp_print*/
1771 0, /*tp_getattr*/
1772 0, /*tp_setattr*/
1773 0, /*tp_compare*/
1774 0, /*tp_repr*/
1775 0, /*tp_as_number*/
1776 0, /*tp_as_sequence*/
1777 0, /*tp_as_mapping*/
1778 0, /*tp_hash*/
1779 0, /*tp_call*/
1780 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001781 PyObject_GenericGetAttr,/*tp_getattro*/
1782 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001783 0, /*tp_as_buffer*/
1784 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1785 BZ2Comp__doc__, /*tp_doc*/
1786 0, /*tp_traverse*/
1787 0, /*tp_clear*/
1788 0, /*tp_richcompare*/
1789 0, /*tp_weaklistoffset*/
1790 0, /*tp_iter*/
1791 0, /*tp_iternext*/
1792 BZ2Comp_methods, /*tp_methods*/
1793 0, /*tp_members*/
1794 0, /*tp_getset*/
1795 0, /*tp_base*/
1796 0, /*tp_dict*/
1797 0, /*tp_descr_get*/
1798 0, /*tp_descr_set*/
1799 0, /*tp_dictoffset*/
1800 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001801 PyType_GenericAlloc, /*tp_alloc*/
1802 PyType_GenericNew, /*tp_new*/
1803 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001804 0, /*tp_is_gc*/
1805};
1806
1807
1808/* ===================================================================== */
1809/* Members of BZ2Decomp. */
1810
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001811#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001812#define OFF(x) offsetof(BZ2DecompObject, x)
1813
1814static PyMemberDef BZ2Decomp_members[] = {
1815 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1816 {NULL} /* Sentinel */
1817};
1818
1819
1820/* ===================================================================== */
1821/* Methods of BZ2Decomp. */
1822
1823PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1824"decompress(data) -> string\n\
1825\n\
1826Provide more data to the decompressor object. It will return chunks\n\
1827of decompressed data whenever possible. If you try to decompress data\n\
1828after the end of stream is found, EOFError will be raised. If any data\n\
1829was found after the end of stream, it'll be ignored and saved in\n\
1830unused_data attribute.\n\
1831");
1832
1833static PyObject *
1834BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1835{
1836 char *data;
1837 int datasize;
1838 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001839 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001840 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001841 bz_stream *bzs = &self->bzs;
1842 int bzerror;
1843
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001844 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001845 return NULL;
1846
1847 ACQUIRE_LOCK(self);
1848 if (!self->running) {
1849 PyErr_SetString(PyExc_EOFError, "end of stream was "
1850 "already found");
1851 goto error;
1852 }
1853
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001854 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855 if (!ret)
1856 goto error;
1857
1858 bzs->next_in = data;
1859 bzs->avail_in = datasize;
1860 bzs->next_out = BUF(ret);
1861 bzs->avail_out = bufsize;
1862
1863 totalout = BZS_TOTAL_OUT(bzs);
1864
1865 for (;;) {
1866 Py_BEGIN_ALLOW_THREADS
1867 bzerror = BZ2_bzDecompress(bzs);
1868 Py_END_ALLOW_THREADS
1869 if (bzerror == BZ_STREAM_END) {
1870 if (bzs->avail_in != 0) {
1871 Py_DECREF(self->unused_data);
1872 self->unused_data =
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001873 PyString_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001874 bzs->avail_in);
1875 }
1876 self->running = 0;
1877 break;
1878 }
1879 if (bzerror != BZ_OK) {
1880 Util_CatchBZ2Error(bzerror);
1881 goto error;
1882 }
Georg Brandla47337f2007-03-13 12:34:25 +00001883 if (bzs->avail_in == 0)
1884 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001885 if (bzs->avail_out == 0) {
1886 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001887 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001888 BZ2_bzDecompressEnd(bzs);
1889 goto error;
1890 }
1891 bzs->next_out = BUF(ret);
1892 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1893 - totalout);
1894 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001895 }
1896 }
1897
1898 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001899 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001900
1901 RELEASE_LOCK(self);
1902 return ret;
1903
1904error:
1905 RELEASE_LOCK(self);
1906 Py_XDECREF(ret);
1907 return NULL;
1908}
1909
1910static PyMethodDef BZ2Decomp_methods[] = {
1911 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1912 {NULL, NULL} /* sentinel */
1913};
1914
1915
1916/* ===================================================================== */
1917/* Slot definitions for BZ2Decomp_Type. */
1918
1919static int
1920BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1921{
1922 int bzerror;
1923
1924 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1925 return -1;
1926
1927#ifdef WITH_THREAD
1928 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001929 if (!self->lock) {
1930 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001931 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001932 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001933#endif
1934
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001935 self->unused_data = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001936 if (!self->unused_data)
1937 goto error;
1938
1939 memset(&self->bzs, 0, sizeof(bz_stream));
1940 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1941 if (bzerror != BZ_OK) {
1942 Util_CatchBZ2Error(bzerror);
1943 goto error;
1944 }
1945
1946 self->running = 1;
1947
1948 return 0;
1949
1950error:
1951#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001952 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001953 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001954 self->lock = NULL;
1955 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001956#endif
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001957 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001958 return -1;
1959}
1960
1961static void
1962BZ2Decomp_dealloc(BZ2DecompObject *self)
1963{
1964#ifdef WITH_THREAD
1965 if (self->lock)
1966 PyThread_free_lock(self->lock);
1967#endif
1968 Py_XDECREF(self->unused_data);
1969 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001970 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001971}
1972
1973
1974/* ===================================================================== */
1975/* BZ2Decomp_Type definition. */
1976
1977PyDoc_STRVAR(BZ2Decomp__doc__,
1978"BZ2Decompressor() -> decompressor object\n\
1979\n\
1980Create a new decompressor object. This object may be used to decompress\n\
1981data sequentially. If you want to decompress data in one shot, use the\n\
1982decompress() function instead.\n\
1983");
1984
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001985static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001986 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001987 "bz2.BZ2Decompressor", /*tp_name*/
1988 sizeof(BZ2DecompObject), /*tp_basicsize*/
1989 0, /*tp_itemsize*/
1990 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1991 0, /*tp_print*/
1992 0, /*tp_getattr*/
1993 0, /*tp_setattr*/
1994 0, /*tp_compare*/
1995 0, /*tp_repr*/
1996 0, /*tp_as_number*/
1997 0, /*tp_as_sequence*/
1998 0, /*tp_as_mapping*/
1999 0, /*tp_hash*/
2000 0, /*tp_call*/
2001 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002002 PyObject_GenericGetAttr,/*tp_getattro*/
2003 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002004 0, /*tp_as_buffer*/
2005 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2006 BZ2Decomp__doc__, /*tp_doc*/
2007 0, /*tp_traverse*/
2008 0, /*tp_clear*/
2009 0, /*tp_richcompare*/
2010 0, /*tp_weaklistoffset*/
2011 0, /*tp_iter*/
2012 0, /*tp_iternext*/
2013 BZ2Decomp_methods, /*tp_methods*/
2014 BZ2Decomp_members, /*tp_members*/
2015 0, /*tp_getset*/
2016 0, /*tp_base*/
2017 0, /*tp_dict*/
2018 0, /*tp_descr_get*/
2019 0, /*tp_descr_set*/
2020 0, /*tp_dictoffset*/
2021 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002022 PyType_GenericAlloc, /*tp_alloc*/
2023 PyType_GenericNew, /*tp_new*/
2024 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002025 0, /*tp_is_gc*/
2026};
2027
2028
2029/* ===================================================================== */
2030/* Module functions. */
2031
2032PyDoc_STRVAR(bz2_compress__doc__,
2033"compress(data [, compresslevel=9]) -> string\n\
2034\n\
2035Compress data in one shot. If you want to compress data sequentially,\n\
2036use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2037given, must be a number between 1 and 9.\n\
2038");
2039
2040static PyObject *
2041bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2042{
2043 int compresslevel=9;
2044 char *data;
2045 int datasize;
2046 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002047 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002048 bz_stream _bzs;
2049 bz_stream *bzs = &_bzs;
2050 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002051 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002052
2053 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2054 kwlist, &data, &datasize,
2055 &compresslevel))
2056 return NULL;
2057
2058 if (compresslevel < 1 || compresslevel > 9) {
2059 PyErr_SetString(PyExc_ValueError,
2060 "compresslevel must be between 1 and 9");
2061 return NULL;
2062 }
2063
2064 /* Conforming to bz2 manual, this is large enough to fit compressed
2065 * data in one shot. We will check it later anyway. */
2066 bufsize = datasize + (datasize/100+1) + 600;
2067
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002068 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002069 if (!ret)
2070 return NULL;
2071
2072 memset(bzs, 0, sizeof(bz_stream));
2073
2074 bzs->next_in = data;
2075 bzs->avail_in = datasize;
2076 bzs->next_out = BUF(ret);
2077 bzs->avail_out = bufsize;
2078
2079 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2080 if (bzerror != BZ_OK) {
2081 Util_CatchBZ2Error(bzerror);
2082 Py_DECREF(ret);
2083 return NULL;
2084 }
Tim Peterse3228092002-11-09 04:21:44 +00002085
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002086 for (;;) {
2087 Py_BEGIN_ALLOW_THREADS
2088 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2089 Py_END_ALLOW_THREADS
2090 if (bzerror == BZ_STREAM_END) {
2091 break;
2092 } else if (bzerror != BZ_FINISH_OK) {
2093 BZ2_bzCompressEnd(bzs);
2094 Util_CatchBZ2Error(bzerror);
2095 Py_DECREF(ret);
2096 return NULL;
2097 }
2098 if (bzs->avail_out == 0) {
2099 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002100 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002101 BZ2_bzCompressEnd(bzs);
2102 Py_DECREF(ret);
2103 return NULL;
2104 }
2105 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2106 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2107 }
2108 }
2109
2110 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002111 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002112 BZ2_bzCompressEnd(bzs);
2113
2114 return ret;
2115}
2116
2117PyDoc_STRVAR(bz2_decompress__doc__,
2118"decompress(data) -> decompressed data\n\
2119\n\
2120Decompress data in one shot. If you want to decompress data sequentially,\n\
2121use an instance of BZ2Decompressor instead.\n\
2122");
2123
2124static PyObject *
2125bz2_decompress(PyObject *self, PyObject *args)
2126{
2127 char *data;
2128 int datasize;
2129 int bufsize = SMALLCHUNK;
2130 PyObject *ret;
2131 bz_stream _bzs;
2132 bz_stream *bzs = &_bzs;
2133 int bzerror;
2134
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002135 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002136 return NULL;
2137
2138 if (datasize == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002139 return PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002140
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002141 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002142 if (!ret)
2143 return NULL;
2144
2145 memset(bzs, 0, sizeof(bz_stream));
2146
2147 bzs->next_in = data;
2148 bzs->avail_in = datasize;
2149 bzs->next_out = BUF(ret);
2150 bzs->avail_out = bufsize;
2151
2152 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2153 if (bzerror != BZ_OK) {
2154 Util_CatchBZ2Error(bzerror);
2155 Py_DECREF(ret);
2156 return NULL;
2157 }
Tim Peterse3228092002-11-09 04:21:44 +00002158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002159 for (;;) {
2160 Py_BEGIN_ALLOW_THREADS
2161 bzerror = BZ2_bzDecompress(bzs);
2162 Py_END_ALLOW_THREADS
2163 if (bzerror == BZ_STREAM_END) {
2164 break;
2165 } else if (bzerror != BZ_OK) {
2166 BZ2_bzDecompressEnd(bzs);
2167 Util_CatchBZ2Error(bzerror);
2168 Py_DECREF(ret);
2169 return NULL;
2170 }
Georg Brandla47337f2007-03-13 12:34:25 +00002171 if (bzs->avail_in == 0) {
2172 BZ2_bzDecompressEnd(bzs);
2173 PyErr_SetString(PyExc_ValueError,
2174 "couldn't find end of stream");
2175 Py_DECREF(ret);
2176 return NULL;
2177 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002178 if (bzs->avail_out == 0) {
2179 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002180 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002181 BZ2_bzDecompressEnd(bzs);
2182 Py_DECREF(ret);
2183 return NULL;
2184 }
2185 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2186 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002187 }
2188 }
2189
2190 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002191 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002192 BZ2_bzDecompressEnd(bzs);
2193
2194 return ret;
2195}
2196
2197static PyMethodDef bz2_methods[] = {
2198 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2199 bz2_compress__doc__},
2200 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2201 bz2_decompress__doc__},
2202 {NULL, NULL} /* sentinel */
2203};
2204
2205/* ===================================================================== */
2206/* Initialization function. */
2207
2208PyDoc_STRVAR(bz2__doc__,
2209"The python bz2 module provides a comprehensive interface for\n\
2210the bz2 compression library. It implements a complete file\n\
2211interface, one shot (de)compression functions, and types for\n\
2212sequential (de)compression.\n\
2213");
2214
Neal Norwitz21d896c2003-07-01 20:15:21 +00002215PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002216initbz2(void)
2217{
2218 PyObject *m;
2219
Christian Heimese93237d2007-12-19 02:37:44 +00002220 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2221 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2222 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002223
2224 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002225 if (m == NULL)
2226 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002227
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002228 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002229
2230 Py_INCREF(&BZ2File_Type);
2231 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2232
2233 Py_INCREF(&BZ2Comp_Type);
2234 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2235
2236 Py_INCREF(&BZ2Decomp_Type);
2237 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2238}