blob: 045d7b270c03d890741971465dcca907735ff468 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimese93237d2007-12-19 02:37:44 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000238 int bytes_read;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000239 int newlinetypes = f->f_newlinetypes;
240 int skipnextlf = f->f_skipnextlf;
241 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242
243 total_v_size = n > 0 ? n : 100;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000244 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245 if (v == NULL)
246 return NULL;
247
248 buf = BUF(v);
249 end = buf + total_v_size;
250
251 for (;;) {
252 Py_BEGIN_ALLOW_THREADS
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000253 while (buf != end) {
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bytes_read == 0) break;
257 if (univ_newline) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000261 /* Seeing a \n here with skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262 * saw a \r before.
263 */
264 newlinetypes |= NEWLINE_CRLF;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000265 if (bzerror != BZ_OK) break;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0) break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000269 } else {
270 newlinetypes |= NEWLINE_CR;
271 }
272 }
273 if (c == '\r') {
274 skipnextlf = 1;
275 c = '\n';
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000276 } else if (c == '\n')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000277 newlinetypes |= NEWLINE_LF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 }
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000279 *buf++ = c;
280 if (bzerror != BZ_OK || c == '\n') break;
281 }
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000285 f->f_newlinetypes = newlinetypes;
286 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000287 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000288 f->size = f->pos;
289 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000290 break;
291 } else if (bzerror != BZ_OK) {
292 Util_CatchBZ2Error(bzerror);
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (c == '\n')
297 break;
298 /* Must be because buf == end */
299 if (n > 0)
300 break;
301 used_v_size = total_v_size;
302 increment = total_v_size >> 2; /* mild exponential growth */
303 total_v_size += increment;
304 if (total_v_size > INT_MAX) {
305 PyErr_SetString(PyExc_OverflowError,
306 "line is longer than a Python string can hold");
307 Py_DECREF(v);
308 return NULL;
309 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000310 if (_PyString_Resize(&v, total_v_size) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000311 return NULL;
312 buf = BUF(v) + used_v_size;
313 end = BUF(v) + total_v_size;
314 }
315
316 used_v_size = buf - BUF(v);
317 if (used_v_size != total_v_size)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000318 _PyString_Resize(&v, used_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000319 return v;
320}
321
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000322/* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
324size_t
325Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327{
328 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 int newlinetypes, skipnextlf;
330
331 assert(buf != NULL);
332 assert(stream != NULL);
333
334 if (!f->f_univ_newline)
335 return BZ2_bzRead(bzerror, stream, buf, n);
336
337 newlinetypes = f->f_newlinetypes;
338 skipnextlf = f->f_skipnextlf;
339
340 /* Invariant: n is the number of bytes remaining to be filled
341 * in the buffer.
342 */
343 while (n) {
344 size_t nread;
345 int shortread;
346 char *src = dst;
347
348 nread = BZ2_bzRead(bzerror, stream, dst, n);
349 assert(nread <= n);
350 n -= nread; /* assuming 1 byte out for each in; will adjust */
351 shortread = n != 0; /* true iff EOF or error */
352 while (nread--) {
353 char c = *src++;
354 if (c == '\r') {
355 /* Save as LF and set flag to skip next LF. */
356 *dst++ = '\n';
357 skipnextlf = 1;
358 }
359 else if (skipnextlf && c == '\n') {
360 /* Skip LF, and remember we saw CR LF. */
361 skipnextlf = 0;
362 newlinetypes |= NEWLINE_CRLF;
363 ++n;
364 }
365 else {
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
369 */
370 if (c == '\n')
371 newlinetypes |= NEWLINE_LF;
372 else if (skipnextlf)
373 newlinetypes |= NEWLINE_CR;
374 *dst++ = c;
375 skipnextlf = 0;
376 }
377 }
378 if (shortread) {
379 /* If this is EOF, update type flags. */
380 if (skipnextlf && *bzerror == BZ_STREAM_END)
381 newlinetypes |= NEWLINE_CR;
382 break;
383 }
384 }
385 f->f_newlinetypes = newlinetypes;
386 f->f_skipnextlf = skipnextlf;
387 return dst - buf;
388}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389
390/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
391static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000392Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000393{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394 if (f->f_buf != NULL) {
395 PyMem_Free(f->f_buf);
396 f->f_buf = NULL;
397 }
398}
399
400/* This is a hacked version of Python's fileobject.c:readahead(). */
401static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
404 int chunksize;
405 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000406
407 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000408 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409 return 0;
410 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000411 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000414 f->f_bufptr = f->f_buf;
415 f->f_bufend = f->f_buf;
416 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 }
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Antoine Pitrou016b3662008-08-09 17:22:25 +0000419 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000420 return -1;
421 }
422 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
424 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000425 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000426 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 f->size = f->pos;
429 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 } else if (bzerror != BZ_OK) {
431 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000432 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000433 return -1;
434 }
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf + chunksize;
437 return 0;
438}
439
440/* This is a hacked version of Python's
441 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000442static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000443Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000444{
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 PyStringObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000446 char *bufptr;
447 char *buf;
448 int len;
449
450 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000451 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000452 return NULL;
453
454 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000455 if (len == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000456 return (PyStringObject *)
457 PyString_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000458 bufptr = memchr(f->f_bufptr, '\n', len);
459 if (bufptr != NULL) {
460 bufptr++; /* Count the '\n' */
461 len = bufptr - f->f_bufptr;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000462 s = (PyStringObject *)
463 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000464 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000465 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000466 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 f->f_bufptr = bufptr;
468 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000469 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 } else {
471 bufptr = f->f_bufptr;
472 buf = f->f_buf;
473 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 s = Util_ReadAheadGetLineSkip(f, skip+len,
475 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000476 if (s == NULL) {
477 PyMem_Free(buf);
478 return NULL;
479 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000480 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 PyMem_Free(buf);
482 }
483 return s;
484}
485
486/* ===================================================================== */
487/* Methods of BZ2File. */
488
489PyDoc_STRVAR(BZ2File_read__doc__,
490"read([size]) -> string\n\
491\n\
492Read at most size uncompressed bytes, returned as a string. If the size\n\
493argument is negative or omitted, read until EOF is reached.\n\
494");
495
496/* This is a hacked version of Python's fileobject.c:file_read(). */
497static PyObject *
498BZ2File_read(BZ2FileObject *self, PyObject *args)
499{
500 long bytesrequested = -1;
501 size_t bytesread, buffersize, chunksize;
502 int bzerror;
503 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000504
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000505 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
506 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000507
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000508 ACQUIRE_LOCK(self);
509 switch (self->mode) {
510 case MODE_READ:
511 break;
512 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000513 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000514 goto cleanup;
515 case MODE_CLOSED:
516 PyErr_SetString(PyExc_ValueError,
517 "I/O operation on closed file");
518 goto cleanup;
519 default:
520 PyErr_SetString(PyExc_IOError,
521 "file is not ready for reading");
522 goto cleanup;
523 }
524
525 if (bytesrequested < 0)
526 buffersize = Util_NewBufferSize((size_t)0);
527 else
528 buffersize = bytesrequested;
529 if (buffersize > INT_MAX) {
530 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000531 "requested number of bytes is "
532 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000533 goto cleanup;
534 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000535 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000536 if (ret == NULL)
537 goto cleanup;
538 bytesread = 0;
539
540 for (;;) {
541 Py_BEGIN_ALLOW_THREADS
542 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
543 BUF(ret)+bytesread,
544 buffersize-bytesread,
545 self);
546 self->pos += chunksize;
547 Py_END_ALLOW_THREADS
548 bytesread += chunksize;
549 if (bzerror == BZ_STREAM_END) {
550 self->size = self->pos;
551 self->mode = MODE_READ_EOF;
552 break;
553 } else if (bzerror != BZ_OK) {
554 Util_CatchBZ2Error(bzerror);
555 Py_DECREF(ret);
556 ret = NULL;
557 goto cleanup;
558 }
559 if (bytesrequested < 0) {
560 buffersize = Util_NewBufferSize(buffersize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000561 if (_PyString_Resize(&ret, buffersize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000562 goto cleanup;
563 } else {
564 break;
565 }
566 }
567 if (bytesread != buffersize)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000568 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000569
570cleanup:
571 RELEASE_LOCK(self);
572 return ret;
573}
574
575PyDoc_STRVAR(BZ2File_readline__doc__,
576"readline([size]) -> string\n\
577\n\
578Return the next line from the file, as a string, retaining newline.\n\
579A non-negative size argument will limit the maximum number of bytes to\n\
580return (an incomplete line may be returned then). Return an empty\n\
581string at EOF.\n\
582");
583
584static PyObject *
585BZ2File_readline(BZ2FileObject *self, PyObject *args)
586{
587 PyObject *ret = NULL;
588 int sizehint = -1;
589
590 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
591 return NULL;
592
593 ACQUIRE_LOCK(self);
594 switch (self->mode) {
595 case MODE_READ:
596 break;
597 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000598 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000599 goto cleanup;
600 case MODE_CLOSED:
601 PyErr_SetString(PyExc_ValueError,
602 "I/O operation on closed file");
603 goto cleanup;
604 default:
605 PyErr_SetString(PyExc_IOError,
606 "file is not ready for reading");
607 goto cleanup;
608 }
609
610 if (sizehint == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000611 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000612 else
613 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
614
615cleanup:
616 RELEASE_LOCK(self);
617 return ret;
618}
619
620PyDoc_STRVAR(BZ2File_readlines__doc__,
621"readlines([size]) -> list\n\
622\n\
623Call readline() repeatedly and return a list of lines read.\n\
624The optional size argument, if given, is an approximate bound on the\n\
625total number of bytes in the lines returned.\n\
626");
627
628/* This is a hacked version of Python's fileobject.c:file_readlines(). */
629static PyObject *
630BZ2File_readlines(BZ2FileObject *self, PyObject *args)
631{
632 long sizehint = 0;
633 PyObject *list = NULL;
634 PyObject *line;
635 char small_buffer[SMALLCHUNK];
636 char *buffer = small_buffer;
637 size_t buffersize = SMALLCHUNK;
638 PyObject *big_buffer = NULL;
639 size_t nfilled = 0;
640 size_t nread;
641 size_t totalread = 0;
642 char *p, *q, *end;
643 int err;
644 int shortread = 0;
645 int bzerror;
646
647 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
648 return NULL;
649
650 ACQUIRE_LOCK(self);
651 switch (self->mode) {
652 case MODE_READ:
653 break;
654 case MODE_READ_EOF:
655 list = PyList_New(0);
656 goto cleanup;
657 case MODE_CLOSED:
658 PyErr_SetString(PyExc_ValueError,
659 "I/O operation on closed file");
660 goto cleanup;
661 default:
662 PyErr_SetString(PyExc_IOError,
663 "file is not ready for reading");
664 goto cleanup;
665 }
666
667 if ((list = PyList_New(0)) == NULL)
668 goto cleanup;
669
670 for (;;) {
671 Py_BEGIN_ALLOW_THREADS
672 nread = Util_UnivNewlineRead(&bzerror, self->fp,
673 buffer+nfilled,
674 buffersize-nfilled, self);
675 self->pos += nread;
676 Py_END_ALLOW_THREADS
677 if (bzerror == BZ_STREAM_END) {
678 self->size = self->pos;
679 self->mode = MODE_READ_EOF;
680 if (nread == 0) {
681 sizehint = 0;
682 break;
683 }
684 shortread = 1;
685 } else if (bzerror != BZ_OK) {
686 Util_CatchBZ2Error(bzerror);
687 error:
688 Py_DECREF(list);
689 list = NULL;
690 goto cleanup;
691 }
692 totalread += nread;
693 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000694 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695 /* Need a larger buffer to fit this line */
696 nfilled += nread;
697 buffersize *= 2;
698 if (buffersize > INT_MAX) {
699 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000700 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000701 goto error;
702 }
703 if (big_buffer == NULL) {
704 /* Create the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000705 big_buffer = PyString_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 NULL, buffersize);
707 if (big_buffer == NULL)
708 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000709 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000710 memcpy(buffer, small_buffer, nfilled);
711 }
712 else {
713 /* Grow the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000714 _PyString_Resize(&big_buffer, buffersize);
715 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000716 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000717 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000718 }
719 end = buffer+nfilled+nread;
720 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000721 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 /* Process complete lines */
723 p++;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000724 line = PyString_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000725 if (line == NULL)
726 goto error;
727 err = PyList_Append(list, line);
728 Py_DECREF(line);
729 if (err != 0)
730 goto error;
731 q = p;
732 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000734 /* Move the remaining incomplete line to the start */
735 nfilled = end-q;
736 memmove(buffer, q, nfilled);
737 if (sizehint > 0)
738 if (totalread >= (size_t)sizehint)
739 break;
740 if (shortread) {
741 sizehint = 0;
742 break;
743 }
744 }
745 if (nfilled != 0) {
746 /* Partial last line */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000747 line = PyString_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000748 if (line == NULL)
749 goto error;
750 if (sizehint > 0) {
751 /* Need to complete the last line */
752 PyObject *rest = Util_GetLine(self, 0);
753 if (rest == NULL) {
754 Py_DECREF(line);
755 goto error;
756 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000757 PyString_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000758 Py_DECREF(rest);
759 if (line == NULL)
760 goto error;
761 }
762 err = PyList_Append(list, line);
763 Py_DECREF(line);
764 if (err != 0)
765 goto error;
766 }
767
768 cleanup:
769 RELEASE_LOCK(self);
770 if (big_buffer) {
771 Py_DECREF(big_buffer);
772 }
773 return list;
774}
775
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000776PyDoc_STRVAR(BZ2File_xreadlines__doc__,
777"xreadlines() -> self\n\
778\n\
779For backward compatibility. BZ2File objects now include the performance\n\
780optimizations previously implemented in the xreadlines module.\n\
781");
782
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000783PyDoc_STRVAR(BZ2File_write__doc__,
784"write(data) -> None\n\
785\n\
786Write the 'data' string to file. Note that due to buffering, close() may\n\
787be needed before the file on disk reflects the data written.\n\
788");
789
790/* This is a hacked version of Python's fileobject.c:file_write(). */
791static PyObject *
792BZ2File_write(BZ2FileObject *self, PyObject *args)
793{
794 PyObject *ret = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000795 Py_buffer pbuf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000796 char *buf;
797 int len;
798 int bzerror;
799
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000800 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000801 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000802 buf = pbuf.buf;
803 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000804
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 ACQUIRE_LOCK(self);
806 switch (self->mode) {
807 case MODE_WRITE:
808 break;
Tim Peterse3228092002-11-09 04:21:44 +0000809
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 case MODE_CLOSED:
811 PyErr_SetString(PyExc_ValueError,
812 "I/O operation on closed file");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000813 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000814
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000815 default:
816 PyErr_SetString(PyExc_IOError,
817 "file is not ready for writing");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000818 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000819 }
820
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000821 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000822
823 Py_BEGIN_ALLOW_THREADS
824 BZ2_bzWrite (&bzerror, self->fp, buf, len);
825 self->pos += len;
826 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000827
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 if (bzerror != BZ_OK) {
829 Util_CatchBZ2Error(bzerror);
830 goto cleanup;
831 }
Tim Peterse3228092002-11-09 04:21:44 +0000832
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000833 Py_INCREF(Py_None);
834 ret = Py_None;
835
836cleanup:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000837 PyBuffer_Release(&pbuf);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000838 RELEASE_LOCK(self);
839 return ret;
840}
841
842PyDoc_STRVAR(BZ2File_writelines__doc__,
843"writelines(sequence_of_strings) -> None\n\
844\n\
845Write the sequence of strings to the file. Note that newlines are not\n\
846added. The sequence can be any iterable object producing strings. This is\n\
847equivalent to calling write() for each string.\n\
848");
849
850/* This is a hacked version of Python's fileobject.c:file_writelines(). */
851static PyObject *
852BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
853{
854#define CHUNKSIZE 1000
855 PyObject *list = NULL;
856 PyObject *iter = NULL;
857 PyObject *ret = NULL;
858 PyObject *line;
859 int i, j, index, len, islist;
860 int bzerror;
861
862 ACQUIRE_LOCK(self);
Georg Brandl3335a7a2006-08-14 21:42:55 +0000863 switch (self->mode) {
864 case MODE_WRITE:
865 break;
866
867 case MODE_CLOSED:
868 PyErr_SetString(PyExc_ValueError,
869 "I/O operation on closed file");
870 goto error;
871
872 default:
873 PyErr_SetString(PyExc_IOError,
874 "file is not ready for writing");
875 goto error;
876 }
877
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000878 islist = PyList_Check(seq);
879 if (!islist) {
880 iter = PyObject_GetIter(seq);
881 if (iter == NULL) {
882 PyErr_SetString(PyExc_TypeError,
883 "writelines() requires an iterable argument");
884 goto error;
885 }
886 list = PyList_New(CHUNKSIZE);
887 if (list == NULL)
888 goto error;
889 }
890
891 /* Strategy: slurp CHUNKSIZE lines into a private list,
892 checking that they are all strings, then write that list
893 without holding the interpreter lock, then come back for more. */
894 for (index = 0; ; index += CHUNKSIZE) {
895 if (islist) {
896 Py_XDECREF(list);
897 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
898 if (list == NULL)
899 goto error;
900 j = PyList_GET_SIZE(list);
901 }
902 else {
903 for (j = 0; j < CHUNKSIZE; j++) {
904 line = PyIter_Next(iter);
905 if (line == NULL) {
906 if (PyErr_Occurred())
907 goto error;
908 break;
909 }
910 PyList_SetItem(list, j, line);
911 }
912 }
913 if (j == 0)
914 break;
915
916 /* Check that all entries are indeed strings. If not,
917 apply the same rules as for file.write() and
918 convert the rets to strings. This is slow, but
919 seems to be the only way since all conversion APIs
920 could potentially execute Python code. */
921 for (i = 0; i < j; i++) {
922 PyObject *v = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000923 if (!PyString_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000924 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000925 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000926 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
927 PyErr_SetString(PyExc_TypeError,
928 "writelines() "
929 "argument must be "
930 "a sequence of "
931 "strings");
932 goto error;
933 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000934 line = PyString_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000935 len);
936 if (line == NULL)
937 goto error;
938 Py_DECREF(v);
939 PyList_SET_ITEM(list, i, line);
940 }
941 }
942
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000943 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000944
945 /* Since we are releasing the global lock, the
946 following code may *not* execute Python code. */
947 Py_BEGIN_ALLOW_THREADS
948 for (i = 0; i < j; i++) {
949 line = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000950 len = PyString_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000951 BZ2_bzWrite (&bzerror, self->fp,
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000952 PyString_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000953 if (bzerror != BZ_OK) {
954 Py_BLOCK_THREADS
955 Util_CatchBZ2Error(bzerror);
956 goto error;
957 }
958 }
959 Py_END_ALLOW_THREADS
960
961 if (j < CHUNKSIZE)
962 break;
963 }
964
965 Py_INCREF(Py_None);
966 ret = Py_None;
967
968 error:
969 RELEASE_LOCK(self);
970 Py_XDECREF(list);
971 Py_XDECREF(iter);
972 return ret;
973#undef CHUNKSIZE
974}
975
976PyDoc_STRVAR(BZ2File_seek__doc__,
977"seek(offset [, whence]) -> None\n\
978\n\
979Move to new file position. Argument offset is a byte count. Optional\n\
980argument whence defaults to 0 (offset from start of file, offset\n\
981should be >= 0); other values are 1 (move relative to current position,\n\
982positive or negative), and 2 (move relative to end of file, usually\n\
983negative, although many platforms allow seeking beyond the end of a file).\n\
984\n\
985Note that seeking of bz2 files is emulated, and depending on the parameters\n\
986the operation may be extremely slow.\n\
987");
988
989static PyObject *
990BZ2File_seek(BZ2FileObject *self, PyObject *args)
991{
992 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000993 PyObject *offobj;
994 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000995 char small_buffer[SMALLCHUNK];
996 char *buffer = small_buffer;
997 size_t buffersize = SMALLCHUNK;
Andrew M. Kuchling44b054b2006-12-18 19:22:24 +0000998 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000999 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 int chunksize;
1001 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001002 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001003
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001004 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1005 return NULL;
1006#if !defined(HAVE_LARGEFILE_SUPPORT)
1007 offset = PyInt_AsLong(offobj);
1008#else
1009 offset = PyLong_Check(offobj) ?
1010 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1011#endif
1012 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001013 return NULL;
1014
1015 ACQUIRE_LOCK(self);
1016 Util_DropReadAhead(self);
1017 switch (self->mode) {
1018 case MODE_READ:
1019 case MODE_READ_EOF:
1020 break;
Tim Peterse3228092002-11-09 04:21:44 +00001021
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001022 case MODE_CLOSED:
1023 PyErr_SetString(PyExc_ValueError,
1024 "I/O operation on closed file");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001025 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001026
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001027 default:
1028 PyErr_SetString(PyExc_IOError,
1029 "seek works only while reading");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001030 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031 }
1032
Georg Brandl47fab922006-02-18 21:57:25 +00001033 if (where == 2) {
1034 if (self->size == -1) {
1035 assert(self->mode != MODE_READ_EOF);
1036 for (;;) {
1037 Py_BEGIN_ALLOW_THREADS
1038 chunksize = Util_UnivNewlineRead(
1039 &bzerror, self->fp,
1040 buffer, buffersize,
1041 self);
1042 self->pos += chunksize;
1043 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001044
Georg Brandl47fab922006-02-18 21:57:25 +00001045 bytesread += chunksize;
1046 if (bzerror == BZ_STREAM_END) {
1047 break;
1048 } else if (bzerror != BZ_OK) {
1049 Util_CatchBZ2Error(bzerror);
1050 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001051 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052 }
Georg Brandl47fab922006-02-18 21:57:25 +00001053 self->mode = MODE_READ_EOF;
1054 self->size = self->pos;
1055 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 }
Georg Brandl47fab922006-02-18 21:57:25 +00001057 offset = self->size + offset;
1058 } else if (where == 1) {
1059 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001060 }
1061
Georg Brandl47fab922006-02-18 21:57:25 +00001062 /* Before getting here, offset must be the absolute position the file
1063 * pointer should be set to. */
1064
1065 if (offset >= self->pos) {
1066 /* we can move forward */
1067 offset -= self->pos;
1068 } else {
1069 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001070 BZ2_bzReadClose(&bzerror, self->fp);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001071 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001072 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001073 self->fp = NULL;
1074 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001075 if (bzerror != BZ_OK) {
1076 Util_CatchBZ2Error(bzerror);
1077 goto cleanup;
1078 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001079 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080 if (!ret)
1081 goto cleanup;
1082 Py_DECREF(ret);
1083 ret = NULL;
1084 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001085 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001086 0, 0, NULL, 0);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001087 if (self->fp)
Gregory P. Smith73bee442008-04-12 20:37:48 +00001088 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001089 if (bzerror != BZ_OK) {
1090 Util_CatchBZ2Error(bzerror);
1091 goto cleanup;
1092 }
1093 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094 }
1095
Georg Brandl47fab922006-02-18 21:57:25 +00001096 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001097 goto exit;
1098
1099 /* Before getting here, offset must be set to the number of bytes
1100 * to walk forward. */
1101 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001102 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001103 readsize = buffersize;
1104 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001105 /* offset might be wider that readsize, but the result
1106 * of the subtraction is bound by buffersize (see the
1107 * condition above). buffersize is 8192. */
1108 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001109 Py_BEGIN_ALLOW_THREADS
1110 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1111 buffer, readsize, self);
1112 self->pos += chunksize;
1113 Py_END_ALLOW_THREADS
1114 bytesread += chunksize;
1115 if (bzerror == BZ_STREAM_END) {
1116 self->size = self->pos;
1117 self->mode = MODE_READ_EOF;
1118 break;
1119 } else if (bzerror != BZ_OK) {
1120 Util_CatchBZ2Error(bzerror);
1121 goto cleanup;
1122 }
1123 if (bytesread == offset)
1124 break;
1125 }
1126
1127exit:
1128 Py_INCREF(Py_None);
1129 ret = Py_None;
1130
1131cleanup:
1132 RELEASE_LOCK(self);
1133 return ret;
1134}
1135
1136PyDoc_STRVAR(BZ2File_tell__doc__,
1137"tell() -> int\n\
1138\n\
1139Return the current file position, an integer (may be a long integer).\n\
1140");
1141
1142static PyObject *
1143BZ2File_tell(BZ2FileObject *self, PyObject *args)
1144{
1145 PyObject *ret = NULL;
1146
1147 if (self->mode == MODE_CLOSED) {
1148 PyErr_SetString(PyExc_ValueError,
1149 "I/O operation on closed file");
1150 goto cleanup;
1151 }
1152
Georg Brandla8bcecc2005-09-03 07:49:53 +00001153#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001155#else
1156 ret = PyLong_FromLongLong(self->pos);
1157#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001158
1159cleanup:
1160 return ret;
1161}
1162
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163PyDoc_STRVAR(BZ2File_close__doc__,
1164"close() -> None or (perhaps) an integer\n\
1165\n\
1166Close the file. Sets data attribute .closed to true. A closed file\n\
1167cannot be used for further I/O operations. close() may be called more\n\
1168than once without error.\n\
1169");
1170
1171static PyObject *
1172BZ2File_close(BZ2FileObject *self)
1173{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001174 PyObject *ret = NULL;
1175 int bzerror = BZ_OK;
1176
1177 ACQUIRE_LOCK(self);
1178 switch (self->mode) {
1179 case MODE_READ:
1180 case MODE_READ_EOF:
1181 BZ2_bzReadClose(&bzerror, self->fp);
1182 break;
1183 case MODE_WRITE:
1184 BZ2_bzWriteClose(&bzerror, self->fp,
1185 0, NULL, NULL);
1186 break;
1187 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001188 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001189 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001190 self->fp = NULL;
1191 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001193 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194 if (bzerror != BZ_OK) {
1195 Util_CatchBZ2Error(bzerror);
1196 Py_XDECREF(ret);
1197 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198 }
1199
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200 RELEASE_LOCK(self);
1201 return ret;
1202}
1203
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001204PyDoc_STRVAR(BZ2File_enter_doc,
1205"__enter__() -> self.");
1206
1207static PyObject *
1208BZ2File_enter(BZ2FileObject *self)
1209{
1210 if (self->mode == MODE_CLOSED) {
1211 PyErr_SetString(PyExc_ValueError,
1212 "I/O operation on closed file");
1213 return NULL;
1214 }
1215 Py_INCREF(self);
1216 return (PyObject *) self;
1217}
1218
1219PyDoc_STRVAR(BZ2File_exit_doc,
1220"__exit__(*excinfo) -> None. Closes the file.");
1221
1222static PyObject *
1223BZ2File_exit(BZ2FileObject *self, PyObject *args)
1224{
1225 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1226 if (!ret)
1227 /* If error occurred, pass through */
1228 return NULL;
1229 Py_DECREF(ret);
1230 Py_RETURN_NONE;
1231}
1232
1233
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001234static PyObject *BZ2File_getiter(BZ2FileObject *self);
1235
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001236static PyMethodDef BZ2File_methods[] = {
1237 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1238 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1239 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001240 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001241 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1242 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1243 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1244 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001245 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001246 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1247 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001248 {NULL, NULL} /* sentinel */
1249};
1250
1251
1252/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001253/* Getters and setters of BZ2File. */
1254
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001255/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1256static PyObject *
1257BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1258{
1259 switch (self->f_newlinetypes) {
1260 case NEWLINE_UNKNOWN:
1261 Py_INCREF(Py_None);
1262 return Py_None;
1263 case NEWLINE_CR:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001264 return PyString_FromString("\r");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001265 case NEWLINE_LF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001266 return PyString_FromString("\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001267 case NEWLINE_CR|NEWLINE_LF:
1268 return Py_BuildValue("(ss)", "\r", "\n");
1269 case NEWLINE_CRLF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001270 return PyString_FromString("\r\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001271 case NEWLINE_CR|NEWLINE_CRLF:
1272 return Py_BuildValue("(ss)", "\r", "\r\n");
1273 case NEWLINE_LF|NEWLINE_CRLF:
1274 return Py_BuildValue("(ss)", "\n", "\r\n");
1275 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1276 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1277 default:
1278 PyErr_Format(PyExc_SystemError,
1279 "Unknown newlines value 0x%x\n",
1280 self->f_newlinetypes);
1281 return NULL;
1282 }
1283}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001284
1285static PyObject *
1286BZ2File_get_closed(BZ2FileObject *self, void *closure)
1287{
1288 return PyInt_FromLong(self->mode == MODE_CLOSED);
1289}
1290
1291static PyObject *
1292BZ2File_get_mode(BZ2FileObject *self, void *closure)
1293{
1294 return PyObject_GetAttrString(self->file, "mode");
1295}
1296
1297static PyObject *
1298BZ2File_get_name(BZ2FileObject *self, void *closure)
1299{
1300 return PyObject_GetAttrString(self->file, "name");
1301}
1302
1303static PyGetSetDef BZ2File_getset[] = {
1304 {"closed", (getter)BZ2File_get_closed, NULL,
1305 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001306 {"newlines", (getter)BZ2File_get_newlines, NULL,
1307 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001308 {"mode", (getter)BZ2File_get_mode, NULL,
1309 "file mode ('r', 'w', or 'U')"},
1310 {"name", (getter)BZ2File_get_name, NULL,
1311 "file name"},
1312 {NULL} /* Sentinel */
1313};
1314
1315
1316/* ===================================================================== */
1317/* Members of BZ2File_Type. */
1318
1319#undef OFF
1320#define OFF(x) offsetof(BZ2FileObject, x)
1321
1322static PyMemberDef BZ2File_members[] = {
1323 {"softspace", T_INT, OFF(f_softspace), 0,
1324 "flag indicating that a space needs to be printed; used by print"},
1325 {NULL} /* Sentinel */
1326};
1327
1328/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001329/* Slot definitions for BZ2File_Type. */
1330
1331static int
1332BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1333{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001334 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001335 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001336 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337 char *mode = "r";
1338 int buffering = -1;
1339 int compresslevel = 9;
1340 int bzerror;
1341 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001342
1343 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001344
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001345 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1346 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347 &compresslevel))
1348 return -1;
1349
1350 if (compresslevel < 1 || compresslevel > 9) {
1351 PyErr_SetString(PyExc_ValueError,
1352 "compresslevel must be between 1 and 9");
1353 return -1;
1354 }
1355
1356 for (;;) {
1357 int error = 0;
1358 switch (*mode) {
1359 case 'r':
1360 case 'w':
1361 if (mode_char)
1362 error = 1;
1363 mode_char = *mode;
1364 break;
1365
1366 case 'b':
1367 break;
1368
1369 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001370#ifdef __VMS
1371 self->f_univ_newline = 0;
1372#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001373 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001374#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375 break;
1376
1377 default:
1378 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001379 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001380 }
1381 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001382 PyErr_Format(PyExc_ValueError,
1383 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001384 return -1;
1385 }
1386 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001387 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001388 break;
1389 }
1390
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001391 if (mode_char == 0) {
1392 mode_char = 'r';
1393 }
1394
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001395 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001396
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001397 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1398 name, mode, buffering);
1399 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001400 return -1;
1401
1402 /* From now on, we have stuff to dealloc, so jump to error label
1403 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001404
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001405#ifdef WITH_THREAD
1406 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001407 if (!self->lock) {
1408 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001409 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001410 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001411#endif
1412
1413 if (mode_char == 'r')
1414 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001415 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416 0, 0, NULL, 0);
1417 else
1418 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001419 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001420 compresslevel, 0, 0);
1421
1422 if (bzerror != BZ_OK) {
1423 Util_CatchBZ2Error(bzerror);
1424 goto error;
1425 }
Gregory P. Smith73bee442008-04-12 20:37:48 +00001426 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001427
1428 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1429
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001430 return 0;
1431
1432error:
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001433 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001434#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001435 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001436 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001437 self->lock = NULL;
1438 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001440 return -1;
1441}
1442
1443static void
1444BZ2File_dealloc(BZ2FileObject *self)
1445{
1446 int bzerror;
1447#ifdef WITH_THREAD
1448 if (self->lock)
1449 PyThread_free_lock(self->lock);
1450#endif
1451 switch (self->mode) {
1452 case MODE_READ:
1453 case MODE_READ_EOF:
1454 BZ2_bzReadClose(&bzerror, self->fp);
1455 break;
1456 case MODE_WRITE:
1457 BZ2_bzWriteClose(&bzerror, self->fp,
1458 0, NULL, NULL);
1459 break;
1460 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001461 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001462 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001463 self->fp = NULL;
1464 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001465 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001466 Py_XDECREF(self->file);
Christian Heimese93237d2007-12-19 02:37:44 +00001467 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001468}
1469
1470/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1471static PyObject *
1472BZ2File_getiter(BZ2FileObject *self)
1473{
1474 if (self->mode == MODE_CLOSED) {
1475 PyErr_SetString(PyExc_ValueError,
1476 "I/O operation on closed file");
1477 return NULL;
1478 }
1479 Py_INCREF((PyObject*)self);
1480 return (PyObject *)self;
1481}
1482
1483/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1484#define READAHEAD_BUFSIZE 8192
1485static PyObject *
1486BZ2File_iternext(BZ2FileObject *self)
1487{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001488 PyStringObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001489 ACQUIRE_LOCK(self);
1490 if (self->mode == MODE_CLOSED) {
Gregory P. Smith3b1e6b22008-07-07 04:31:58 +00001491 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492 PyErr_SetString(PyExc_ValueError,
1493 "I/O operation on closed file");
1494 return NULL;
1495 }
1496 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1497 RELEASE_LOCK(self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001498 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001499 Py_XDECREF(ret);
1500 return NULL;
1501 }
1502 return (PyObject *)ret;
1503}
1504
1505/* ===================================================================== */
1506/* BZ2File_Type definition. */
1507
1508PyDoc_VAR(BZ2File__doc__) =
1509PyDoc_STR(
1510"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1511\n\
1512Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1513writing. When opened for writing, the file will be created if it doesn't\n\
1514exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1515unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1516is given, must be a number between 1 and 9.\n\
1517")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001518PyDoc_STR(
1519"\n\
1520Add a 'U' to mode to open the file for input with universal newline\n\
1521support. Any line ending in the input file will be seen as a '\\n' in\n\
1522Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1523for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1524'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1525newlines are available only when reading.\n\
1526")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001527;
1528
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001529static PyTypeObject BZ2File_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001530 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001531 "bz2.BZ2File", /*tp_name*/
1532 sizeof(BZ2FileObject), /*tp_basicsize*/
1533 0, /*tp_itemsize*/
1534 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1535 0, /*tp_print*/
1536 0, /*tp_getattr*/
1537 0, /*tp_setattr*/
1538 0, /*tp_compare*/
1539 0, /*tp_repr*/
1540 0, /*tp_as_number*/
1541 0, /*tp_as_sequence*/
1542 0, /*tp_as_mapping*/
1543 0, /*tp_hash*/
1544 0, /*tp_call*/
1545 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001546 PyObject_GenericGetAttr,/*tp_getattro*/
1547 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001548 0, /*tp_as_buffer*/
1549 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1550 BZ2File__doc__, /*tp_doc*/
1551 0, /*tp_traverse*/
1552 0, /*tp_clear*/
1553 0, /*tp_richcompare*/
1554 0, /*tp_weaklistoffset*/
1555 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1556 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1557 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001558 BZ2File_members, /*tp_members*/
1559 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001560 0, /*tp_base*/
1561 0, /*tp_dict*/
1562 0, /*tp_descr_get*/
1563 0, /*tp_descr_set*/
1564 0, /*tp_dictoffset*/
1565 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001566 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001567 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001568 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001569 0, /*tp_is_gc*/
1570};
1571
1572
1573/* ===================================================================== */
1574/* Methods of BZ2Comp. */
1575
1576PyDoc_STRVAR(BZ2Comp_compress__doc__,
1577"compress(data) -> string\n\
1578\n\
1579Provide more data to the compressor object. It will return chunks of\n\
1580compressed data whenever possible. When you've finished providing data\n\
1581to compress, call the flush() method to finish the compression process,\n\
1582and return what is left in the internal buffers.\n\
1583");
1584
1585static PyObject *
1586BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1587{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001588 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001589 char *data;
1590 int datasize;
1591 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001592 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001593 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001594 bz_stream *bzs = &self->bzs;
1595 int bzerror;
1596
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001597 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001598 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001599 data = pdata.buf;
1600 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001601
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001602 if (datasize == 0) {
1603 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001604 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001605 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001606
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001607 ACQUIRE_LOCK(self);
1608 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001609 PyErr_SetString(PyExc_ValueError,
1610 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001611 goto error;
1612 }
1613
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001614 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001615 if (!ret)
1616 goto error;
1617
1618 bzs->next_in = data;
1619 bzs->avail_in = datasize;
1620 bzs->next_out = BUF(ret);
1621 bzs->avail_out = bufsize;
1622
1623 totalout = BZS_TOTAL_OUT(bzs);
1624
1625 for (;;) {
1626 Py_BEGIN_ALLOW_THREADS
1627 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1628 Py_END_ALLOW_THREADS
1629 if (bzerror != BZ_RUN_OK) {
1630 Util_CatchBZ2Error(bzerror);
1631 goto error;
1632 }
Georg Brandla47337f2007-03-13 12:34:25 +00001633 if (bzs->avail_in == 0)
1634 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001635 if (bzs->avail_out == 0) {
1636 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001637 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001638 BZ2_bzCompressEnd(bzs);
1639 goto error;
1640 }
1641 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1642 - totalout);
1643 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001644 }
1645 }
1646
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001647 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001648
1649 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001650 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001651 return ret;
1652
1653error:
1654 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001655 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001656 Py_XDECREF(ret);
1657 return NULL;
1658}
1659
1660PyDoc_STRVAR(BZ2Comp_flush__doc__,
1661"flush() -> string\n\
1662\n\
1663Finish the compression process and return what is left in internal buffers.\n\
1664You must not use the compressor object after calling this method.\n\
1665");
1666
1667static PyObject *
1668BZ2Comp_flush(BZ2CompObject *self)
1669{
1670 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001671 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001672 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001673 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001674 int bzerror;
1675
1676 ACQUIRE_LOCK(self);
1677 if (!self->running) {
1678 PyErr_SetString(PyExc_ValueError, "object was already "
1679 "flushed");
1680 goto error;
1681 }
1682 self->running = 0;
1683
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001684 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001685 if (!ret)
1686 goto error;
1687
1688 bzs->next_out = BUF(ret);
1689 bzs->avail_out = bufsize;
1690
1691 totalout = BZS_TOTAL_OUT(bzs);
1692
1693 for (;;) {
1694 Py_BEGIN_ALLOW_THREADS
1695 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1696 Py_END_ALLOW_THREADS
1697 if (bzerror == BZ_STREAM_END) {
1698 break;
1699 } else if (bzerror != BZ_FINISH_OK) {
1700 Util_CatchBZ2Error(bzerror);
1701 goto error;
1702 }
1703 if (bzs->avail_out == 0) {
1704 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001705 if (_PyString_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001706 goto error;
1707 bzs->next_out = BUF(ret);
1708 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1709 - totalout);
1710 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1711 }
1712 }
1713
1714 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001715 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001716
1717 RELEASE_LOCK(self);
1718 return ret;
1719
1720error:
1721 RELEASE_LOCK(self);
1722 Py_XDECREF(ret);
1723 return NULL;
1724}
1725
1726static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001727 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1728 BZ2Comp_compress__doc__},
1729 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1730 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001731 {NULL, NULL} /* sentinel */
1732};
1733
1734
1735/* ===================================================================== */
1736/* Slot definitions for BZ2Comp_Type. */
1737
1738static int
1739BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1740{
1741 int compresslevel = 9;
1742 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001743 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744
1745 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1746 kwlist, &compresslevel))
1747 return -1;
1748
1749 if (compresslevel < 1 || compresslevel > 9) {
1750 PyErr_SetString(PyExc_ValueError,
1751 "compresslevel must be between 1 and 9");
1752 goto error;
1753 }
1754
1755#ifdef WITH_THREAD
1756 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001757 if (!self->lock) {
1758 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001759 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001760 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001761#endif
1762
1763 memset(&self->bzs, 0, sizeof(bz_stream));
1764 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1765 if (bzerror != BZ_OK) {
1766 Util_CatchBZ2Error(bzerror);
1767 goto error;
1768 }
1769
1770 self->running = 1;
1771
1772 return 0;
1773error:
1774#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001775 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001776 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001777 self->lock = NULL;
1778 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001779#endif
1780 return -1;
1781}
1782
1783static void
1784BZ2Comp_dealloc(BZ2CompObject *self)
1785{
1786#ifdef WITH_THREAD
1787 if (self->lock)
1788 PyThread_free_lock(self->lock);
1789#endif
1790 BZ2_bzCompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001791 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001792}
1793
1794
1795/* ===================================================================== */
1796/* BZ2Comp_Type definition. */
1797
1798PyDoc_STRVAR(BZ2Comp__doc__,
1799"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1800\n\
1801Create a new compressor object. This object may be used to compress\n\
1802data sequentially. If you want to compress data in one shot, use the\n\
1803compress() function instead. The compresslevel parameter, if given,\n\
1804must be a number between 1 and 9.\n\
1805");
1806
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001807static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001808 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001809 "bz2.BZ2Compressor", /*tp_name*/
1810 sizeof(BZ2CompObject), /*tp_basicsize*/
1811 0, /*tp_itemsize*/
1812 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1813 0, /*tp_print*/
1814 0, /*tp_getattr*/
1815 0, /*tp_setattr*/
1816 0, /*tp_compare*/
1817 0, /*tp_repr*/
1818 0, /*tp_as_number*/
1819 0, /*tp_as_sequence*/
1820 0, /*tp_as_mapping*/
1821 0, /*tp_hash*/
1822 0, /*tp_call*/
1823 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001824 PyObject_GenericGetAttr,/*tp_getattro*/
1825 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001826 0, /*tp_as_buffer*/
1827 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1828 BZ2Comp__doc__, /*tp_doc*/
1829 0, /*tp_traverse*/
1830 0, /*tp_clear*/
1831 0, /*tp_richcompare*/
1832 0, /*tp_weaklistoffset*/
1833 0, /*tp_iter*/
1834 0, /*tp_iternext*/
1835 BZ2Comp_methods, /*tp_methods*/
1836 0, /*tp_members*/
1837 0, /*tp_getset*/
1838 0, /*tp_base*/
1839 0, /*tp_dict*/
1840 0, /*tp_descr_get*/
1841 0, /*tp_descr_set*/
1842 0, /*tp_dictoffset*/
1843 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001844 PyType_GenericAlloc, /*tp_alloc*/
1845 PyType_GenericNew, /*tp_new*/
1846 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001847 0, /*tp_is_gc*/
1848};
1849
1850
1851/* ===================================================================== */
1852/* Members of BZ2Decomp. */
1853
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001854#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855#define OFF(x) offsetof(BZ2DecompObject, x)
1856
1857static PyMemberDef BZ2Decomp_members[] = {
1858 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1859 {NULL} /* Sentinel */
1860};
1861
1862
1863/* ===================================================================== */
1864/* Methods of BZ2Decomp. */
1865
1866PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1867"decompress(data) -> string\n\
1868\n\
1869Provide more data to the decompressor object. It will return chunks\n\
1870of decompressed data whenever possible. If you try to decompress data\n\
1871after the end of stream is found, EOFError will be raised. If any data\n\
1872was found after the end of stream, it'll be ignored and saved in\n\
1873unused_data attribute.\n\
1874");
1875
1876static PyObject *
1877BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1878{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001879 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001880 char *data;
1881 int datasize;
1882 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001883 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001884 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001885 bz_stream *bzs = &self->bzs;
1886 int bzerror;
1887
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001888 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001889 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001890 data = pdata.buf;
1891 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001892
1893 ACQUIRE_LOCK(self);
1894 if (!self->running) {
1895 PyErr_SetString(PyExc_EOFError, "end of stream was "
1896 "already found");
1897 goto error;
1898 }
1899
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001900 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001901 if (!ret)
1902 goto error;
1903
1904 bzs->next_in = data;
1905 bzs->avail_in = datasize;
1906 bzs->next_out = BUF(ret);
1907 bzs->avail_out = bufsize;
1908
1909 totalout = BZS_TOTAL_OUT(bzs);
1910
1911 for (;;) {
1912 Py_BEGIN_ALLOW_THREADS
1913 bzerror = BZ2_bzDecompress(bzs);
1914 Py_END_ALLOW_THREADS
1915 if (bzerror == BZ_STREAM_END) {
1916 if (bzs->avail_in != 0) {
1917 Py_DECREF(self->unused_data);
1918 self->unused_data =
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001919 PyString_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001920 bzs->avail_in);
1921 }
1922 self->running = 0;
1923 break;
1924 }
1925 if (bzerror != BZ_OK) {
1926 Util_CatchBZ2Error(bzerror);
1927 goto error;
1928 }
Georg Brandla47337f2007-03-13 12:34:25 +00001929 if (bzs->avail_in == 0)
1930 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001931 if (bzs->avail_out == 0) {
1932 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001933 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001934 BZ2_bzDecompressEnd(bzs);
1935 goto error;
1936 }
1937 bzs->next_out = BUF(ret);
1938 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1939 - totalout);
1940 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941 }
1942 }
1943
1944 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001945 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001946
1947 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001948 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949 return ret;
1950
1951error:
1952 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001953 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954 Py_XDECREF(ret);
1955 return NULL;
1956}
1957
1958static PyMethodDef BZ2Decomp_methods[] = {
1959 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1960 {NULL, NULL} /* sentinel */
1961};
1962
1963
1964/* ===================================================================== */
1965/* Slot definitions for BZ2Decomp_Type. */
1966
1967static int
1968BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1969{
1970 int bzerror;
1971
1972 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1973 return -1;
1974
1975#ifdef WITH_THREAD
1976 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001977 if (!self->lock) {
1978 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001979 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001980 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001981#endif
1982
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001983 self->unused_data = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001984 if (!self->unused_data)
1985 goto error;
1986
1987 memset(&self->bzs, 0, sizeof(bz_stream));
1988 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1989 if (bzerror != BZ_OK) {
1990 Util_CatchBZ2Error(bzerror);
1991 goto error;
1992 }
1993
1994 self->running = 1;
1995
1996 return 0;
1997
1998error:
1999#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002000 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002001 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002002 self->lock = NULL;
2003 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002004#endif
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002005 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002006 return -1;
2007}
2008
2009static void
2010BZ2Decomp_dealloc(BZ2DecompObject *self)
2011{
2012#ifdef WITH_THREAD
2013 if (self->lock)
2014 PyThread_free_lock(self->lock);
2015#endif
2016 Py_XDECREF(self->unused_data);
2017 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00002018 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019}
2020
2021
2022/* ===================================================================== */
2023/* BZ2Decomp_Type definition. */
2024
2025PyDoc_STRVAR(BZ2Decomp__doc__,
2026"BZ2Decompressor() -> decompressor object\n\
2027\n\
2028Create a new decompressor object. This object may be used to decompress\n\
2029data sequentially. If you want to decompress data in one shot, use the\n\
2030decompress() function instead.\n\
2031");
2032
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002033static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00002034 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002035 "bz2.BZ2Decompressor", /*tp_name*/
2036 sizeof(BZ2DecompObject), /*tp_basicsize*/
2037 0, /*tp_itemsize*/
2038 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2039 0, /*tp_print*/
2040 0, /*tp_getattr*/
2041 0, /*tp_setattr*/
2042 0, /*tp_compare*/
2043 0, /*tp_repr*/
2044 0, /*tp_as_number*/
2045 0, /*tp_as_sequence*/
2046 0, /*tp_as_mapping*/
2047 0, /*tp_hash*/
2048 0, /*tp_call*/
2049 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002050 PyObject_GenericGetAttr,/*tp_getattro*/
2051 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002052 0, /*tp_as_buffer*/
2053 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2054 BZ2Decomp__doc__, /*tp_doc*/
2055 0, /*tp_traverse*/
2056 0, /*tp_clear*/
2057 0, /*tp_richcompare*/
2058 0, /*tp_weaklistoffset*/
2059 0, /*tp_iter*/
2060 0, /*tp_iternext*/
2061 BZ2Decomp_methods, /*tp_methods*/
2062 BZ2Decomp_members, /*tp_members*/
2063 0, /*tp_getset*/
2064 0, /*tp_base*/
2065 0, /*tp_dict*/
2066 0, /*tp_descr_get*/
2067 0, /*tp_descr_set*/
2068 0, /*tp_dictoffset*/
2069 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002070 PyType_GenericAlloc, /*tp_alloc*/
2071 PyType_GenericNew, /*tp_new*/
2072 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002073 0, /*tp_is_gc*/
2074};
2075
2076
2077/* ===================================================================== */
2078/* Module functions. */
2079
2080PyDoc_STRVAR(bz2_compress__doc__,
2081"compress(data [, compresslevel=9]) -> string\n\
2082\n\
2083Compress data in one shot. If you want to compress data sequentially,\n\
2084use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2085given, must be a number between 1 and 9.\n\
2086");
2087
2088static PyObject *
2089bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2090{
2091 int compresslevel=9;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002092 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002093 char *data;
2094 int datasize;
2095 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002096 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002097 bz_stream _bzs;
2098 bz_stream *bzs = &_bzs;
2099 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002100 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002101
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002102 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2103 kwlist, &pdata,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002104 &compresslevel))
2105 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002106 data = pdata.buf;
2107 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002108
2109 if (compresslevel < 1 || compresslevel > 9) {
2110 PyErr_SetString(PyExc_ValueError,
2111 "compresslevel must be between 1 and 9");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002112 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002113 return NULL;
2114 }
2115
2116 /* Conforming to bz2 manual, this is large enough to fit compressed
2117 * data in one shot. We will check it later anyway. */
2118 bufsize = datasize + (datasize/100+1) + 600;
2119
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002120 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002121 if (!ret) {
2122 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002123 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002124 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002125
2126 memset(bzs, 0, sizeof(bz_stream));
2127
2128 bzs->next_in = data;
2129 bzs->avail_in = datasize;
2130 bzs->next_out = BUF(ret);
2131 bzs->avail_out = bufsize;
2132
2133 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2134 if (bzerror != BZ_OK) {
2135 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002136 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002137 Py_DECREF(ret);
2138 return NULL;
2139 }
Tim Peterse3228092002-11-09 04:21:44 +00002140
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002141 for (;;) {
2142 Py_BEGIN_ALLOW_THREADS
2143 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2144 Py_END_ALLOW_THREADS
2145 if (bzerror == BZ_STREAM_END) {
2146 break;
2147 } else if (bzerror != BZ_FINISH_OK) {
2148 BZ2_bzCompressEnd(bzs);
2149 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002150 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002151 Py_DECREF(ret);
2152 return NULL;
2153 }
2154 if (bzs->avail_out == 0) {
2155 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002156 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157 BZ2_bzCompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002158 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002159 Py_DECREF(ret);
2160 return NULL;
2161 }
2162 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2163 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2164 }
2165 }
2166
2167 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002168 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002169 BZ2_bzCompressEnd(bzs);
2170
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002171 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002172 return ret;
2173}
2174
2175PyDoc_STRVAR(bz2_decompress__doc__,
2176"decompress(data) -> decompressed data\n\
2177\n\
2178Decompress data in one shot. If you want to decompress data sequentially,\n\
2179use an instance of BZ2Decompressor instead.\n\
2180");
2181
2182static PyObject *
2183bz2_decompress(PyObject *self, PyObject *args)
2184{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002185 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002186 char *data;
2187 int datasize;
2188 int bufsize = SMALLCHUNK;
2189 PyObject *ret;
2190 bz_stream _bzs;
2191 bz_stream *bzs = &_bzs;
2192 int bzerror;
2193
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002194 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002195 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002196 data = pdata.buf;
2197 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002198
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002199 if (datasize == 0) {
2200 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002201 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002202 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002203
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002204 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002205 if (!ret) {
2206 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002207 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002208 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002209
2210 memset(bzs, 0, sizeof(bz_stream));
2211
2212 bzs->next_in = data;
2213 bzs->avail_in = datasize;
2214 bzs->next_out = BUF(ret);
2215 bzs->avail_out = bufsize;
2216
2217 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2218 if (bzerror != BZ_OK) {
2219 Util_CatchBZ2Error(bzerror);
2220 Py_DECREF(ret);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002221 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002222 return NULL;
2223 }
Tim Peterse3228092002-11-09 04:21:44 +00002224
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002225 for (;;) {
2226 Py_BEGIN_ALLOW_THREADS
2227 bzerror = BZ2_bzDecompress(bzs);
2228 Py_END_ALLOW_THREADS
2229 if (bzerror == BZ_STREAM_END) {
2230 break;
2231 } else if (bzerror != BZ_OK) {
2232 BZ2_bzDecompressEnd(bzs);
2233 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002234 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002235 Py_DECREF(ret);
2236 return NULL;
2237 }
Georg Brandla47337f2007-03-13 12:34:25 +00002238 if (bzs->avail_in == 0) {
2239 BZ2_bzDecompressEnd(bzs);
2240 PyErr_SetString(PyExc_ValueError,
2241 "couldn't find end of stream");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002242 PyBuffer_Release(&pdata);
Georg Brandla47337f2007-03-13 12:34:25 +00002243 Py_DECREF(ret);
2244 return NULL;
2245 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002246 if (bzs->avail_out == 0) {
2247 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002248 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002249 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002250 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002251 Py_DECREF(ret);
2252 return NULL;
2253 }
2254 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2255 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002256 }
2257 }
2258
2259 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002260 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002261 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002262 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002263
2264 return ret;
2265}
2266
2267static PyMethodDef bz2_methods[] = {
2268 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2269 bz2_compress__doc__},
2270 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2271 bz2_decompress__doc__},
2272 {NULL, NULL} /* sentinel */
2273};
2274
2275/* ===================================================================== */
2276/* Initialization function. */
2277
2278PyDoc_STRVAR(bz2__doc__,
2279"The python bz2 module provides a comprehensive interface for\n\
2280the bz2 compression library. It implements a complete file\n\
2281interface, one shot (de)compression functions, and types for\n\
2282sequential (de)compression.\n\
2283");
2284
Neal Norwitz21d896c2003-07-01 20:15:21 +00002285PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002286initbz2(void)
2287{
2288 PyObject *m;
2289
Christian Heimese93237d2007-12-19 02:37:44 +00002290 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2291 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2292 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002293
2294 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002295 if (m == NULL)
2296 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002297
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002298 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002299
2300 Py_INCREF(&BZ2File_Type);
2301 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2302
2303 Py_INCREF(&BZ2Comp_Type);
2304 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2305
2306 Py_INCREF(&BZ2Decomp_Type);
2307 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2308}