blob: d4f27436fe0b0863a07193cd2fdc60cc7242dcd1 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Guido van Rossum522a6c62007-05-22 23:13:45 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105 int f_univ_newline; /* Handle any newline convention */
106 int f_newlinetypes; /* Types of newlines seen */
107 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000108
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000109 BZFILE *fp;
110 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000111 Py_off_t pos;
112 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000113#ifdef WITH_THREAD
114 PyThread_type_lock lock;
115#endif
116} BZ2FileObject;
117
118typedef struct {
119 PyObject_HEAD
120 bz_stream bzs;
121 int running;
122#ifdef WITH_THREAD
123 PyThread_type_lock lock;
124#endif
125} BZ2CompObject;
126
127typedef struct {
128 PyObject_HEAD
129 bz_stream bzs;
130 int running;
131 PyObject *unused_data;
132#ifdef WITH_THREAD
133 PyThread_type_lock lock;
134#endif
135} BZ2DecompObject;
136
137/* ===================================================================== */
138/* Utility functions. */
139
140static int
141Util_CatchBZ2Error(int bzerror)
142{
143 int ret = 0;
144 switch(bzerror) {
145 case BZ_OK:
146 case BZ_STREAM_END:
147 break;
148
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000149#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000150 case BZ_CONFIG_ERROR:
151 PyErr_SetString(PyExc_SystemError,
152 "the bz2 library was not compiled "
153 "correctly");
154 ret = 1;
155 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000156#endif
Tim Peterse3228092002-11-09 04:21:44 +0000157
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000158 case BZ_PARAM_ERROR:
159 PyErr_SetString(PyExc_ValueError,
160 "the bz2 library has received wrong "
161 "parameters");
162 ret = 1;
163 break;
Tim Peterse3228092002-11-09 04:21:44 +0000164
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165 case BZ_MEM_ERROR:
166 PyErr_NoMemory();
167 ret = 1;
168 break;
169
170 case BZ_DATA_ERROR:
171 case BZ_DATA_ERROR_MAGIC:
172 PyErr_SetString(PyExc_IOError, "invalid data stream");
173 ret = 1;
174 break;
175
176 case BZ_IO_ERROR:
177 PyErr_SetString(PyExc_IOError, "unknown IO error");
178 ret = 1;
179 break;
180
181 case BZ_UNEXPECTED_EOF:
182 PyErr_SetString(PyExc_EOFError,
183 "compressed file ended before the "
184 "logical end-of-stream was detected");
185 ret = 1;
186 break;
187
188 case BZ_SEQUENCE_ERROR:
189 PyErr_SetString(PyExc_RuntimeError,
190 "wrong sequence of bz2 library "
191 "commands used");
192 ret = 1;
193 break;
194 }
195 return ret;
196}
197
198#if BUFSIZ < 8192
199#define SMALLCHUNK 8192
200#else
201#define SMALLCHUNK BUFSIZ
202#endif
203
204#if SIZEOF_INT < 4
205#define BIGCHUNK (512 * 32)
206#else
207#define BIGCHUNK (512 * 1024)
208#endif
209
210/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
211static size_t
212Util_NewBufferSize(size_t currentsize)
213{
214 if (currentsize > SMALLCHUNK) {
215 /* Keep doubling until we reach BIGCHUNK;
216 then keep adding BIGCHUNK. */
217 if (currentsize <= BIGCHUNK)
218 return currentsize + currentsize;
219 else
220 return currentsize + BIGCHUNK;
221 }
222 return currentsize + SMALLCHUNK;
223}
224
225/* This is a hacked version of Python's fileobject.c:get_line(). */
226static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000227Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000228{
229 char c;
230 char *buf, *end;
231 size_t total_v_size; /* total # of slots in buffer */
232 size_t used_v_size; /* # used slots in buffer */
233 size_t increment; /* amount to increment the buffer */
234 PyObject *v;
235 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000236 int newlinetypes = f->f_newlinetypes;
237 int skipnextlf = f->f_skipnextlf;
238 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000239
240 total_v_size = n > 0 ? n : 100;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000241 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242 if (v == NULL)
243 return NULL;
244
245 buf = BUF(v);
246 end = buf + total_v_size;
247
248 for (;;) {
249 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000250 if (univ_newline) {
251 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000252 BZ2_bzRead(&bzerror, f->fp, &c, 1);
253 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254 if (bzerror != BZ_OK || buf == end)
255 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000256 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000257 skipnextlf = 0;
258 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000259 /* Seeing a \n here with
260 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000261 * saw a \r before.
262 */
263 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000264 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000265 &c, 1);
266 if (bzerror != BZ_OK)
267 break;
268 } else {
269 newlinetypes |= NEWLINE_CR;
270 }
271 }
272 if (c == '\r') {
273 skipnextlf = 1;
274 c = '\n';
275 } else if ( c == '\n')
276 newlinetypes |= NEWLINE_LF;
277 *buf++ = c;
278 if (c == '\n') break;
279 }
280 if (bzerror == BZ_STREAM_END && skipnextlf)
281 newlinetypes |= NEWLINE_CR;
282 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000283 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000284 BZ2_bzRead(&bzerror, f->fp, &c, 1);
285 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000286 *buf++ = c;
287 } while (bzerror == BZ_OK && c != '\n' && buf != end);
288 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000289 f->f_newlinetypes = newlinetypes;
290 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000292 f->size = f->pos;
293 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294 break;
295 } else if (bzerror != BZ_OK) {
296 Util_CatchBZ2Error(bzerror);
297 Py_DECREF(v);
298 return NULL;
299 }
300 if (c == '\n')
301 break;
302 /* Must be because buf == end */
303 if (n > 0)
304 break;
305 used_v_size = total_v_size;
306 increment = total_v_size >> 2; /* mild exponential growth */
307 total_v_size += increment;
308 if (total_v_size > INT_MAX) {
309 PyErr_SetString(PyExc_OverflowError,
310 "line is longer than a Python string can hold");
311 Py_DECREF(v);
312 return NULL;
313 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000314 if (PyBytes_Resize(v, total_v_size) < 0) {
315 Py_DECREF(v);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000316 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000317 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000323 if (used_v_size != total_v_size) {
324 if (PyBytes_Resize(v, used_v_size) < 0) {
325 Py_DECREF(v);
326 v = NULL;
327 }
328 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 return v;
330}
331
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332/* This is a hacked version of Python's
333 * fileobject.c:Py_UniversalNewlineFread(). */
334size_t
335Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000336 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000337{
338 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000339 int newlinetypes, skipnextlf;
340
341 assert(buf != NULL);
342 assert(stream != NULL);
343
344 if (!f->f_univ_newline)
345 return BZ2_bzRead(bzerror, stream, buf, n);
346
347 newlinetypes = f->f_newlinetypes;
348 skipnextlf = f->f_skipnextlf;
349
350 /* Invariant: n is the number of bytes remaining to be filled
351 * in the buffer.
352 */
353 while (n) {
354 size_t nread;
355 int shortread;
356 char *src = dst;
357
358 nread = BZ2_bzRead(bzerror, stream, dst, n);
359 assert(nread <= n);
360 n -= nread; /* assuming 1 byte out for each in; will adjust */
361 shortread = n != 0; /* true iff EOF or error */
362 while (nread--) {
363 char c = *src++;
364 if (c == '\r') {
365 /* Save as LF and set flag to skip next LF. */
366 *dst++ = '\n';
367 skipnextlf = 1;
368 }
369 else if (skipnextlf && c == '\n') {
370 /* Skip LF, and remember we saw CR LF. */
371 skipnextlf = 0;
372 newlinetypes |= NEWLINE_CRLF;
373 ++n;
374 }
375 else {
376 /* Normal char to be stored in buffer. Also
377 * update the newlinetypes flag if either this
378 * is an LF or the previous char was a CR.
379 */
380 if (c == '\n')
381 newlinetypes |= NEWLINE_LF;
382 else if (skipnextlf)
383 newlinetypes |= NEWLINE_CR;
384 *dst++ = c;
385 skipnextlf = 0;
386 }
387 }
388 if (shortread) {
389 /* If this is EOF, update type flags. */
390 if (skipnextlf && *bzerror == BZ_STREAM_END)
391 newlinetypes |= NEWLINE_CR;
392 break;
393 }
394 }
395 f->f_newlinetypes = newlinetypes;
396 f->f_skipnextlf = skipnextlf;
397 return dst - buf;
398}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399
400/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
401static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000404 if (f->f_buf != NULL) {
405 PyMem_Free(f->f_buf);
406 f->f_buf = NULL;
407 }
408}
409
410/* This is a hacked version of Python's fileobject.c:readahead(). */
411static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000412Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413{
414 int chunksize;
415 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000416
417 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000418 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000419 return 0;
420 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000421 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000424 f->f_bufptr = f->f_buf;
425 f->f_bufend = f->f_buf;
426 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427 }
428 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
429 return -1;
430 }
431 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000432 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000434 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000435 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 f->size = f->pos;
438 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000439 } else if (bzerror != BZ_OK) {
440 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000441 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000442 return -1;
443 }
444 f->f_bufptr = f->f_buf;
445 f->f_bufend = f->f_buf + chunksize;
446 return 0;
447}
448
449/* This is a hacked version of Python's
450 * fileobject.c:readahead_get_line_skip(). */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000451static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000452Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000453{
Guido van Rossum522a6c62007-05-22 23:13:45 +0000454 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000455 char *bufptr;
456 char *buf;
457 int len;
458
459 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000460 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return NULL;
462
463 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000464 if (len == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +0000465 return (PyBytesObject *)
466 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 bufptr = memchr(f->f_bufptr, '\n', len);
468 if (bufptr != NULL) {
469 bufptr++; /* Count the '\n' */
470 len = bufptr - f->f_bufptr;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000471 s = (PyBytesObject *)
472 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000473 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000474 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000475 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000476 f->f_bufptr = bufptr;
477 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000478 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000479 } else {
480 bufptr = f->f_bufptr;
481 buf = f->f_buf;
482 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000483 s = Util_ReadAheadGetLineSkip(f, skip+len,
484 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000485 if (s == NULL) {
486 PyMem_Free(buf);
487 return NULL;
488 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000489 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000490 PyMem_Free(buf);
491 }
492 return s;
493}
494
495/* ===================================================================== */
496/* Methods of BZ2File. */
497
498PyDoc_STRVAR(BZ2File_read__doc__,
499"read([size]) -> string\n\
500\n\
501Read at most size uncompressed bytes, returned as a string. If the size\n\
502argument is negative or omitted, read until EOF is reached.\n\
503");
504
505/* This is a hacked version of Python's fileobject.c:file_read(). */
506static PyObject *
507BZ2File_read(BZ2FileObject *self, PyObject *args)
508{
509 long bytesrequested = -1;
510 size_t bytesread, buffersize, chunksize;
511 int bzerror;
512 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000513
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000514 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000516
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000517 ACQUIRE_LOCK(self);
518 switch (self->mode) {
519 case MODE_READ:
520 break;
521 case MODE_READ_EOF:
Guido van Rossum522a6c62007-05-22 23:13:45 +0000522 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000523 goto cleanup;
524 case MODE_CLOSED:
525 PyErr_SetString(PyExc_ValueError,
526 "I/O operation on closed file");
527 goto cleanup;
528 default:
529 PyErr_SetString(PyExc_IOError,
530 "file is not ready for reading");
531 goto cleanup;
532 }
533
534 if (bytesrequested < 0)
535 buffersize = Util_NewBufferSize((size_t)0);
536 else
537 buffersize = bytesrequested;
538 if (buffersize > INT_MAX) {
539 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000540 "requested number of bytes is "
541 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000542 goto cleanup;
543 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000544 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000545 if (ret == NULL)
546 goto cleanup;
547 bytesread = 0;
548
549 for (;;) {
550 Py_BEGIN_ALLOW_THREADS
551 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
552 BUF(ret)+bytesread,
553 buffersize-bytesread,
554 self);
555 self->pos += chunksize;
556 Py_END_ALLOW_THREADS
557 bytesread += chunksize;
558 if (bzerror == BZ_STREAM_END) {
559 self->size = self->pos;
560 self->mode = MODE_READ_EOF;
561 break;
562 } else if (bzerror != BZ_OK) {
563 Util_CatchBZ2Error(bzerror);
564 Py_DECREF(ret);
565 ret = NULL;
566 goto cleanup;
567 }
568 if (bytesrequested < 0) {
569 buffersize = Util_NewBufferSize(buffersize);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000570 if (PyBytes_Resize(ret, buffersize) < 0) {
571 Py_DECREF(ret);
572 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000573 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000574 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000575 } else {
576 break;
577 }
578 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000579 if (bytesread != buffersize) {
580 if (PyBytes_Resize(ret, bytesread) < 0) {
581 Py_DECREF(ret);
582 ret = NULL;
583 }
584 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000585
586cleanup:
587 RELEASE_LOCK(self);
588 return ret;
589}
590
591PyDoc_STRVAR(BZ2File_readline__doc__,
592"readline([size]) -> string\n\
593\n\
594Return the next line from the file, as a string, retaining newline.\n\
595A non-negative size argument will limit the maximum number of bytes to\n\
596return (an incomplete line may be returned then). Return an empty\n\
597string at EOF.\n\
598");
599
600static PyObject *
601BZ2File_readline(BZ2FileObject *self, PyObject *args)
602{
603 PyObject *ret = NULL;
604 int sizehint = -1;
605
606 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
607 return NULL;
608
609 ACQUIRE_LOCK(self);
610 switch (self->mode) {
611 case MODE_READ:
612 break;
613 case MODE_READ_EOF:
Guido van Rossum522a6c62007-05-22 23:13:45 +0000614 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000615 goto cleanup;
616 case MODE_CLOSED:
617 PyErr_SetString(PyExc_ValueError,
618 "I/O operation on closed file");
619 goto cleanup;
620 default:
621 PyErr_SetString(PyExc_IOError,
622 "file is not ready for reading");
623 goto cleanup;
624 }
625
626 if (sizehint == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +0000627 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000628 else
629 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
630
631cleanup:
632 RELEASE_LOCK(self);
633 return ret;
634}
635
636PyDoc_STRVAR(BZ2File_readlines__doc__,
637"readlines([size]) -> list\n\
638\n\
639Call readline() repeatedly and return a list of lines read.\n\
640The optional size argument, if given, is an approximate bound on the\n\
641total number of bytes in the lines returned.\n\
642");
643
644/* This is a hacked version of Python's fileobject.c:file_readlines(). */
645static PyObject *
646BZ2File_readlines(BZ2FileObject *self, PyObject *args)
647{
648 long sizehint = 0;
649 PyObject *list = NULL;
650 PyObject *line;
651 char small_buffer[SMALLCHUNK];
652 char *buffer = small_buffer;
653 size_t buffersize = SMALLCHUNK;
654 PyObject *big_buffer = NULL;
655 size_t nfilled = 0;
656 size_t nread;
657 size_t totalread = 0;
658 char *p, *q, *end;
659 int err;
660 int shortread = 0;
661 int bzerror;
662
663 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
664 return NULL;
665
666 ACQUIRE_LOCK(self);
667 switch (self->mode) {
668 case MODE_READ:
669 break;
670 case MODE_READ_EOF:
671 list = PyList_New(0);
672 goto cleanup;
673 case MODE_CLOSED:
674 PyErr_SetString(PyExc_ValueError,
675 "I/O operation on closed file");
676 goto cleanup;
677 default:
678 PyErr_SetString(PyExc_IOError,
679 "file is not ready for reading");
680 goto cleanup;
681 }
682
683 if ((list = PyList_New(0)) == NULL)
684 goto cleanup;
685
686 for (;;) {
687 Py_BEGIN_ALLOW_THREADS
688 nread = Util_UnivNewlineRead(&bzerror, self->fp,
689 buffer+nfilled,
690 buffersize-nfilled, self);
691 self->pos += nread;
692 Py_END_ALLOW_THREADS
693 if (bzerror == BZ_STREAM_END) {
694 self->size = self->pos;
695 self->mode = MODE_READ_EOF;
696 if (nread == 0) {
697 sizehint = 0;
698 break;
699 }
700 shortread = 1;
701 } else if (bzerror != BZ_OK) {
702 Util_CatchBZ2Error(bzerror);
703 error:
704 Py_DECREF(list);
705 list = NULL;
706 goto cleanup;
707 }
708 totalread += nread;
709 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000710 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000711 /* Need a larger buffer to fit this line */
712 nfilled += nread;
713 buffersize *= 2;
714 if (buffersize > INT_MAX) {
715 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000716 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000717 goto error;
718 }
719 if (big_buffer == NULL) {
720 /* Create the big buffer */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000721 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 NULL, buffersize);
723 if (big_buffer == NULL)
724 goto error;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000725 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000726 memcpy(buffer, small_buffer, nfilled);
727 }
728 else {
729 /* Grow the big buffer */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000730 if (PyBytes_Resize(big_buffer, buffersize) < 0){
731 Py_DECREF(big_buffer);
732 big_buffer = NULL;
733 goto error;
734 }
735 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000736 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000737 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000738 }
739 end = buffer+nfilled+nread;
740 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000741 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000742 /* Process complete lines */
743 p++;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000744 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000745 if (line == NULL)
746 goto error;
747 err = PyList_Append(list, line);
748 Py_DECREF(line);
749 if (err != 0)
750 goto error;
751 q = p;
752 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000753 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000754 /* Move the remaining incomplete line to the start */
755 nfilled = end-q;
756 memmove(buffer, q, nfilled);
757 if (sizehint > 0)
758 if (totalread >= (size_t)sizehint)
759 break;
760 if (shortread) {
761 sizehint = 0;
762 break;
763 }
764 }
765 if (nfilled != 0) {
766 /* Partial last line */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000767 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000768 if (line == NULL)
769 goto error;
770 if (sizehint > 0) {
771 /* Need to complete the last line */
772 PyObject *rest = Util_GetLine(self, 0);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000773 PyObject *new;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000774 if (rest == NULL) {
775 Py_DECREF(line);
776 goto error;
777 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000778 new = PyBytes_Concat(line, rest);
779 Py_DECREF(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000780 Py_DECREF(rest);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000781 line = new;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000782 if (line == NULL)
783 goto error;
784 }
785 err = PyList_Append(list, line);
786 Py_DECREF(line);
787 if (err != 0)
788 goto error;
789 }
790
791 cleanup:
792 RELEASE_LOCK(self);
793 if (big_buffer) {
794 Py_DECREF(big_buffer);
795 }
796 return list;
797}
798
799PyDoc_STRVAR(BZ2File_write__doc__,
800"write(data) -> None\n\
801\n\
802Write the 'data' string to file. Note that due to buffering, close() may\n\
803be needed before the file on disk reflects the data written.\n\
804");
805
806/* This is a hacked version of Python's fileobject.c:file_write(). */
807static PyObject *
808BZ2File_write(BZ2FileObject *self, PyObject *args)
809{
810 PyObject *ret = NULL;
811 char *buf;
812 int len;
813 int bzerror;
814
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000815 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000817
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000818 ACQUIRE_LOCK(self);
819 switch (self->mode) {
820 case MODE_WRITE:
821 break;
Tim Peterse3228092002-11-09 04:21:44 +0000822
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000823 case MODE_CLOSED:
824 PyErr_SetString(PyExc_ValueError,
825 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000826 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000827
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 default:
829 PyErr_SetString(PyExc_IOError,
830 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000831 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000832 }
833
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000834 Py_BEGIN_ALLOW_THREADS
835 BZ2_bzWrite (&bzerror, self->fp, buf, len);
836 self->pos += len;
837 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000838
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000839 if (bzerror != BZ_OK) {
840 Util_CatchBZ2Error(bzerror);
841 goto cleanup;
842 }
Tim Peterse3228092002-11-09 04:21:44 +0000843
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000844 Py_INCREF(Py_None);
845 ret = Py_None;
846
847cleanup:
848 RELEASE_LOCK(self);
849 return ret;
850}
851
852PyDoc_STRVAR(BZ2File_writelines__doc__,
853"writelines(sequence_of_strings) -> None\n\
854\n\
855Write the sequence of strings to the file. Note that newlines are not\n\
856added. The sequence can be any iterable object producing strings. This is\n\
857equivalent to calling write() for each string.\n\
858");
859
860/* This is a hacked version of Python's fileobject.c:file_writelines(). */
861static PyObject *
862BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
863{
864#define CHUNKSIZE 1000
865 PyObject *list = NULL;
866 PyObject *iter = NULL;
867 PyObject *ret = NULL;
868 PyObject *line;
869 int i, j, index, len, islist;
870 int bzerror;
871
872 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000873 switch (self->mode) {
874 case MODE_WRITE:
875 break;
876
877 case MODE_CLOSED:
878 PyErr_SetString(PyExc_ValueError,
879 "I/O operation on closed file");
880 goto error;
881
882 default:
883 PyErr_SetString(PyExc_IOError,
884 "file is not ready for writing");
885 goto error;
886 }
887
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000888 islist = PyList_Check(seq);
889 if (!islist) {
890 iter = PyObject_GetIter(seq);
891 if (iter == NULL) {
892 PyErr_SetString(PyExc_TypeError,
893 "writelines() requires an iterable argument");
894 goto error;
895 }
896 list = PyList_New(CHUNKSIZE);
897 if (list == NULL)
898 goto error;
899 }
900
901 /* Strategy: slurp CHUNKSIZE lines into a private list,
902 checking that they are all strings, then write that list
903 without holding the interpreter lock, then come back for more. */
904 for (index = 0; ; index += CHUNKSIZE) {
905 if (islist) {
906 Py_XDECREF(list);
907 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
908 if (list == NULL)
909 goto error;
910 j = PyList_GET_SIZE(list);
911 }
912 else {
913 for (j = 0; j < CHUNKSIZE; j++) {
914 line = PyIter_Next(iter);
915 if (line == NULL) {
916 if (PyErr_Occurred())
917 goto error;
918 break;
919 }
920 PyList_SetItem(list, j, line);
921 }
922 }
923 if (j == 0)
924 break;
925
Guido van Rossum522a6c62007-05-22 23:13:45 +0000926 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000927 apply the same rules as for file.write() and
928 convert the rets to strings. This is slow, but
929 seems to be the only way since all conversion APIs
930 could potentially execute Python code. */
931 for (i = 0; i < j; i++) {
932 PyObject *v = PyList_GET_ITEM(list, i);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000933 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000934 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000936 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
937 PyErr_SetString(PyExc_TypeError,
938 "writelines() "
939 "argument must be "
940 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000941 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942 goto error;
943 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000944 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000945 len);
946 if (line == NULL)
947 goto error;
948 Py_DECREF(v);
949 PyList_SET_ITEM(list, i, line);
950 }
951 }
952
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000953 /* Since we are releasing the global lock, the
954 following code may *not* execute Python code. */
955 Py_BEGIN_ALLOW_THREADS
956 for (i = 0; i < j; i++) {
957 line = PyList_GET_ITEM(list, i);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000958 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959 BZ2_bzWrite (&bzerror, self->fp,
Guido van Rossum522a6c62007-05-22 23:13:45 +0000960 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000961 if (bzerror != BZ_OK) {
962 Py_BLOCK_THREADS
963 Util_CatchBZ2Error(bzerror);
964 goto error;
965 }
966 }
967 Py_END_ALLOW_THREADS
968
969 if (j < CHUNKSIZE)
970 break;
971 }
972
973 Py_INCREF(Py_None);
974 ret = Py_None;
975
976 error:
977 RELEASE_LOCK(self);
978 Py_XDECREF(list);
979 Py_XDECREF(iter);
980 return ret;
981#undef CHUNKSIZE
982}
983
984PyDoc_STRVAR(BZ2File_seek__doc__,
985"seek(offset [, whence]) -> None\n\
986\n\
987Move to new file position. Argument offset is a byte count. Optional\n\
988argument whence defaults to 0 (offset from start of file, offset\n\
989should be >= 0); other values are 1 (move relative to current position,\n\
990positive or negative), and 2 (move relative to end of file, usually\n\
991negative, although many platforms allow seeking beyond the end of a file).\n\
992\n\
993Note that seeking of bz2 files is emulated, and depending on the parameters\n\
994the operation may be extremely slow.\n\
995");
996
997static PyObject *
998BZ2File_seek(BZ2FileObject *self, PyObject *args)
999{
1000 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001001 PyObject *offobj;
1002 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001003 char small_buffer[SMALLCHUNK];
1004 char *buffer = small_buffer;
1005 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001006 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +00001007 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001008 int chunksize;
1009 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001010 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001011
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001012 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1013 return NULL;
1014#if !defined(HAVE_LARGEFILE_SUPPORT)
1015 offset = PyInt_AsLong(offobj);
1016#else
1017 offset = PyLong_Check(offobj) ?
1018 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1019#endif
1020 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001021 return NULL;
1022
1023 ACQUIRE_LOCK(self);
1024 Util_DropReadAhead(self);
1025 switch (self->mode) {
1026 case MODE_READ:
1027 case MODE_READ_EOF:
1028 break;
Tim Peterse3228092002-11-09 04:21:44 +00001029
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001030 case MODE_CLOSED:
1031 PyErr_SetString(PyExc_ValueError,
1032 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +00001033 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001034
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001035 default:
1036 PyErr_SetString(PyExc_IOError,
1037 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +00001038 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039 }
1040
Georg Brandl47fab922006-02-18 21:57:25 +00001041 if (where == 2) {
1042 if (self->size == -1) {
1043 assert(self->mode != MODE_READ_EOF);
1044 for (;;) {
1045 Py_BEGIN_ALLOW_THREADS
1046 chunksize = Util_UnivNewlineRead(
1047 &bzerror, self->fp,
1048 buffer, buffersize,
1049 self);
1050 self->pos += chunksize;
1051 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052
Georg Brandl47fab922006-02-18 21:57:25 +00001053 bytesread += chunksize;
1054 if (bzerror == BZ_STREAM_END) {
1055 break;
1056 } else if (bzerror != BZ_OK) {
1057 Util_CatchBZ2Error(bzerror);
1058 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001059 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001060 }
Georg Brandl47fab922006-02-18 21:57:25 +00001061 self->mode = MODE_READ_EOF;
1062 self->size = self->pos;
1063 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001064 }
Georg Brandl47fab922006-02-18 21:57:25 +00001065 offset = self->size + offset;
1066 } else if (where == 1) {
1067 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001068 }
1069
Guido van Rossum522a6c62007-05-22 23:13:45 +00001070 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +00001071 * pointer should be set to. */
1072
1073 if (offset >= self->pos) {
1074 /* we can move forward */
1075 offset -= self->pos;
1076 } else {
1077 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001079 if (bzerror != BZ_OK) {
1080 Util_CatchBZ2Error(bzerror);
1081 goto cleanup;
1082 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001083 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001084 if (!ret)
1085 goto cleanup;
1086 Py_DECREF(ret);
1087 ret = NULL;
1088 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001089 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001090 0, 0, NULL, 0);
1091 if (bzerror != BZ_OK) {
1092 Util_CatchBZ2Error(bzerror);
1093 goto cleanup;
1094 }
1095 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001096 }
1097
Georg Brandl47fab922006-02-18 21:57:25 +00001098 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001099 goto exit;
1100
1101 /* Before getting here, offset must be set to the number of bytes
1102 * to walk forward. */
1103 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001104 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001105 readsize = buffersize;
1106 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001107 /* offset might be wider that readsize, but the result
1108 * of the subtraction is bound by buffersize (see the
1109 * condition above). buffersize is 8192. */
1110 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001111 Py_BEGIN_ALLOW_THREADS
1112 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1113 buffer, readsize, self);
1114 self->pos += chunksize;
1115 Py_END_ALLOW_THREADS
1116 bytesread += chunksize;
1117 if (bzerror == BZ_STREAM_END) {
1118 self->size = self->pos;
1119 self->mode = MODE_READ_EOF;
1120 break;
1121 } else if (bzerror != BZ_OK) {
1122 Util_CatchBZ2Error(bzerror);
1123 goto cleanup;
1124 }
1125 if (bytesread == offset)
1126 break;
1127 }
1128
1129exit:
1130 Py_INCREF(Py_None);
1131 ret = Py_None;
1132
1133cleanup:
1134 RELEASE_LOCK(self);
1135 return ret;
1136}
1137
1138PyDoc_STRVAR(BZ2File_tell__doc__,
1139"tell() -> int\n\
1140\n\
1141Return the current file position, an integer (may be a long integer).\n\
1142");
1143
1144static PyObject *
1145BZ2File_tell(BZ2FileObject *self, PyObject *args)
1146{
1147 PyObject *ret = NULL;
1148
1149 if (self->mode == MODE_CLOSED) {
1150 PyErr_SetString(PyExc_ValueError,
1151 "I/O operation on closed file");
1152 goto cleanup;
1153 }
1154
Georg Brandla8bcecc2005-09-03 07:49:53 +00001155#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001157#else
1158 ret = PyLong_FromLongLong(self->pos);
1159#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001160
1161cleanup:
1162 return ret;
1163}
1164
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001165PyDoc_STRVAR(BZ2File_close__doc__,
1166"close() -> None or (perhaps) an integer\n\
1167\n\
1168Close the file. Sets data attribute .closed to true. A closed file\n\
1169cannot be used for further I/O operations. close() may be called more\n\
1170than once without error.\n\
1171");
1172
1173static PyObject *
1174BZ2File_close(BZ2FileObject *self)
1175{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001176 PyObject *ret = NULL;
1177 int bzerror = BZ_OK;
1178
1179 ACQUIRE_LOCK(self);
1180 switch (self->mode) {
1181 case MODE_READ:
1182 case MODE_READ_EOF:
1183 BZ2_bzReadClose(&bzerror, self->fp);
1184 break;
1185 case MODE_WRITE:
1186 BZ2_bzWriteClose(&bzerror, self->fp,
1187 0, NULL, NULL);
1188 break;
1189 }
1190 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001191 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192 if (bzerror != BZ_OK) {
1193 Util_CatchBZ2Error(bzerror);
1194 Py_XDECREF(ret);
1195 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196 }
1197
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198 RELEASE_LOCK(self);
1199 return ret;
1200}
1201
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001202static PyObject *BZ2File_getiter(BZ2FileObject *self);
1203
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001204static PyMethodDef BZ2File_methods[] = {
1205 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1206 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1207 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1208 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1209 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1210 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1211 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001212 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1213 {NULL, NULL} /* sentinel */
1214};
1215
1216
1217/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001218/* Getters and setters of BZ2File. */
1219
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001220/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1221static PyObject *
1222BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1223{
1224 switch (self->f_newlinetypes) {
1225 case NEWLINE_UNKNOWN:
1226 Py_INCREF(Py_None);
1227 return Py_None;
1228 case NEWLINE_CR:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001229 return PyBytes_FromStringAndSize("\r", 1);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001230 case NEWLINE_LF:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001231 return PyBytes_FromStringAndSize("\n", 1);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001232 case NEWLINE_CR|NEWLINE_LF:
1233 return Py_BuildValue("(ss)", "\r", "\n");
1234 case NEWLINE_CRLF:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001235 return PyBytes_FromStringAndSize("\r\n", 2);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001236 case NEWLINE_CR|NEWLINE_CRLF:
1237 return Py_BuildValue("(ss)", "\r", "\r\n");
1238 case NEWLINE_LF|NEWLINE_CRLF:
1239 return Py_BuildValue("(ss)", "\n", "\r\n");
1240 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1241 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1242 default:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001243 PyErr_Format(PyExc_SystemError,
1244 "Unknown newlines value 0x%x\n",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001245 self->f_newlinetypes);
1246 return NULL;
1247 }
1248}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001249
1250static PyObject *
1251BZ2File_get_closed(BZ2FileObject *self, void *closure)
1252{
1253 return PyInt_FromLong(self->mode == MODE_CLOSED);
1254}
1255
1256static PyObject *
1257BZ2File_get_mode(BZ2FileObject *self, void *closure)
1258{
1259 return PyObject_GetAttrString(self->file, "mode");
1260}
1261
1262static PyObject *
1263BZ2File_get_name(BZ2FileObject *self, void *closure)
1264{
1265 return PyObject_GetAttrString(self->file, "name");
1266}
1267
1268static PyGetSetDef BZ2File_getset[] = {
1269 {"closed", (getter)BZ2File_get_closed, NULL,
1270 "True if the file is closed"},
Guido van Rossum522a6c62007-05-22 23:13:45 +00001271 {"newlines", (getter)BZ2File_get_newlines, NULL,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001272 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001273 {"mode", (getter)BZ2File_get_mode, NULL,
1274 "file mode ('r', 'w', or 'U')"},
1275 {"name", (getter)BZ2File_get_name, NULL,
1276 "file name"},
1277 {NULL} /* Sentinel */
1278};
1279
1280
1281/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001282/* Slot definitions for BZ2File_Type. */
1283
1284static int
1285BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1286{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001287 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001288 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001289 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001290 char *mode = "r";
1291 int buffering = -1;
1292 int compresslevel = 9;
1293 int bzerror;
1294 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001295
1296 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001297
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001298 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1299 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001300 &compresslevel))
1301 return -1;
1302
1303 if (compresslevel < 1 || compresslevel > 9) {
1304 PyErr_SetString(PyExc_ValueError,
1305 "compresslevel must be between 1 and 9");
1306 return -1;
1307 }
1308
1309 for (;;) {
1310 int error = 0;
1311 switch (*mode) {
1312 case 'r':
1313 case 'w':
1314 if (mode_char)
1315 error = 1;
1316 mode_char = *mode;
1317 break;
1318
1319 case 'b':
1320 break;
1321
1322 case 'U':
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001323#ifdef __VMS
1324 self->f_univ_newline = 0;
1325#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001326 self->f_univ_newline = 1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001327#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001328 break;
1329
1330 default:
1331 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001332 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001333 }
1334 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001335 PyErr_Format(PyExc_ValueError,
1336 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337 return -1;
1338 }
1339 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001340 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001341 break;
1342 }
1343
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001344 if (mode_char == 0) {
1345 mode_char = 'r';
1346 }
1347
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001348 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001349
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001350 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1351 name, mode, buffering);
1352 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001353 return -1;
1354
1355 /* From now on, we have stuff to dealloc, so jump to error label
1356 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001357
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001358#ifdef WITH_THREAD
1359 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360 if (!self->lock) {
1361 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001362 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001363 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364#endif
1365
1366 if (mode_char == 'r')
1367 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001368 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369 0, 0, NULL, 0);
1370 else
1371 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001372 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373 compresslevel, 0, 0);
1374
1375 if (bzerror != BZ_OK) {
1376 Util_CatchBZ2Error(bzerror);
1377 goto error;
1378 }
1379
1380 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1381
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001382 return 0;
1383
1384error:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001385 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001386#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001387 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001388 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389 self->lock = NULL;
1390 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001391#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001392 return -1;
1393}
1394
1395static void
1396BZ2File_dealloc(BZ2FileObject *self)
1397{
1398 int bzerror;
1399#ifdef WITH_THREAD
1400 if (self->lock)
1401 PyThread_free_lock(self->lock);
1402#endif
1403 switch (self->mode) {
1404 case MODE_READ:
1405 case MODE_READ_EOF:
1406 BZ2_bzReadClose(&bzerror, self->fp);
1407 break;
1408 case MODE_WRITE:
1409 BZ2_bzWriteClose(&bzerror, self->fp,
1410 0, NULL, NULL);
1411 break;
1412 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001413 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001414 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001415 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416}
1417
1418/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1419static PyObject *
1420BZ2File_getiter(BZ2FileObject *self)
1421{
1422 if (self->mode == MODE_CLOSED) {
1423 PyErr_SetString(PyExc_ValueError,
1424 "I/O operation on closed file");
1425 return NULL;
1426 }
1427 Py_INCREF((PyObject*)self);
1428 return (PyObject *)self;
1429}
1430
1431/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1432#define READAHEAD_BUFSIZE 8192
1433static PyObject *
1434BZ2File_iternext(BZ2FileObject *self)
1435{
Guido van Rossum522a6c62007-05-22 23:13:45 +00001436 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437 ACQUIRE_LOCK(self);
1438 if (self->mode == MODE_CLOSED) {
1439 PyErr_SetString(PyExc_ValueError,
1440 "I/O operation on closed file");
1441 return NULL;
1442 }
1443 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1444 RELEASE_LOCK(self);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001445 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001446 Py_XDECREF(ret);
1447 return NULL;
1448 }
1449 return (PyObject *)ret;
1450}
1451
1452/* ===================================================================== */
1453/* BZ2File_Type definition. */
1454
1455PyDoc_VAR(BZ2File__doc__) =
1456PyDoc_STR(
1457"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1458\n\
1459Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1460writing. When opened for writing, the file will be created if it doesn't\n\
1461exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1462unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1463is given, must be a number between 1 and 9.\n\
1464")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001465PyDoc_STR(
1466"\n\
1467Add a 'U' to mode to open the file for input with universal newline\n\
1468support. Any line ending in the input file will be seen as a '\\n' in\n\
1469Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1470for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1471'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1472newlines are available only when reading.\n\
1473")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001474;
1475
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001476static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001477 PyObject_HEAD_INIT(NULL)
1478 0, /*ob_size*/
1479 "bz2.BZ2File", /*tp_name*/
1480 sizeof(BZ2FileObject), /*tp_basicsize*/
1481 0, /*tp_itemsize*/
1482 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1483 0, /*tp_print*/
1484 0, /*tp_getattr*/
1485 0, /*tp_setattr*/
1486 0, /*tp_compare*/
1487 0, /*tp_repr*/
1488 0, /*tp_as_number*/
1489 0, /*tp_as_sequence*/
1490 0, /*tp_as_mapping*/
1491 0, /*tp_hash*/
1492 0, /*tp_call*/
1493 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001494 PyObject_GenericGetAttr,/*tp_getattro*/
1495 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001496 0, /*tp_as_buffer*/
1497 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1498 BZ2File__doc__, /*tp_doc*/
1499 0, /*tp_traverse*/
1500 0, /*tp_clear*/
1501 0, /*tp_richcompare*/
1502 0, /*tp_weaklistoffset*/
1503 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1504 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1505 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001506 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001507 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001508 0, /*tp_base*/
1509 0, /*tp_dict*/
1510 0, /*tp_descr_get*/
1511 0, /*tp_descr_set*/
1512 0, /*tp_dictoffset*/
1513 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001514 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001515 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001516 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001517 0, /*tp_is_gc*/
1518};
1519
1520
1521/* ===================================================================== */
1522/* Methods of BZ2Comp. */
1523
1524PyDoc_STRVAR(BZ2Comp_compress__doc__,
1525"compress(data) -> string\n\
1526\n\
1527Provide more data to the compressor object. It will return chunks of\n\
1528compressed data whenever possible. When you've finished providing data\n\
1529to compress, call the flush() method to finish the compression process,\n\
1530and return what is left in the internal buffers.\n\
1531");
1532
1533static PyObject *
1534BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1535{
1536 char *data;
1537 int datasize;
1538 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001539 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001540 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001541 bz_stream *bzs = &self->bzs;
1542 int bzerror;
1543
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001544 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001545 return NULL;
1546
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001547 if (datasize == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +00001548 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001549
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001550 ACQUIRE_LOCK(self);
1551 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001552 PyErr_SetString(PyExc_ValueError,
1553 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001554 goto error;
1555 }
1556
Guido van Rossum522a6c62007-05-22 23:13:45 +00001557 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001558 if (!ret)
1559 goto error;
1560
1561 bzs->next_in = data;
1562 bzs->avail_in = datasize;
1563 bzs->next_out = BUF(ret);
1564 bzs->avail_out = bufsize;
1565
1566 totalout = BZS_TOTAL_OUT(bzs);
1567
1568 for (;;) {
1569 Py_BEGIN_ALLOW_THREADS
1570 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1571 Py_END_ALLOW_THREADS
1572 if (bzerror != BZ_RUN_OK) {
1573 Util_CatchBZ2Error(bzerror);
1574 goto error;
1575 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001576 if (bzs->avail_in == 0)
1577 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001578 if (bzs->avail_out == 0) {
1579 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001580 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001581 BZ2_bzCompressEnd(bzs);
1582 goto error;
1583 }
1584 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1585 - totalout);
1586 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001587 }
1588 }
1589
Guido van Rossum522a6c62007-05-22 23:13:45 +00001590 if (PyBytes_Resize(ret,
1591 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1592 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001593
1594 RELEASE_LOCK(self);
1595 return ret;
1596
1597error:
1598 RELEASE_LOCK(self);
1599 Py_XDECREF(ret);
1600 return NULL;
1601}
1602
1603PyDoc_STRVAR(BZ2Comp_flush__doc__,
1604"flush() -> string\n\
1605\n\
1606Finish the compression process and return what is left in internal buffers.\n\
1607You must not use the compressor object after calling this method.\n\
1608");
1609
1610static PyObject *
1611BZ2Comp_flush(BZ2CompObject *self)
1612{
1613 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001614 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001615 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001616 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001617 int bzerror;
1618
1619 ACQUIRE_LOCK(self);
1620 if (!self->running) {
1621 PyErr_SetString(PyExc_ValueError, "object was already "
1622 "flushed");
1623 goto error;
1624 }
1625 self->running = 0;
1626
Guido van Rossum522a6c62007-05-22 23:13:45 +00001627 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001628 if (!ret)
1629 goto error;
1630
1631 bzs->next_out = BUF(ret);
1632 bzs->avail_out = bufsize;
1633
1634 totalout = BZS_TOTAL_OUT(bzs);
1635
1636 for (;;) {
1637 Py_BEGIN_ALLOW_THREADS
1638 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1639 Py_END_ALLOW_THREADS
1640 if (bzerror == BZ_STREAM_END) {
1641 break;
1642 } else if (bzerror != BZ_FINISH_OK) {
1643 Util_CatchBZ2Error(bzerror);
1644 goto error;
1645 }
1646 if (bzs->avail_out == 0) {
1647 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001648 if (PyBytes_Resize(ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001649 goto error;
1650 bzs->next_out = BUF(ret);
1651 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1652 - totalout);
1653 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1654 }
1655 }
1656
Guido van Rossum522a6c62007-05-22 23:13:45 +00001657 if (bzs->avail_out != 0) {
1658 if (PyBytes_Resize(ret,
1659 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1660 goto error;
1661 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001662
1663 RELEASE_LOCK(self);
1664 return ret;
1665
1666error:
1667 RELEASE_LOCK(self);
1668 Py_XDECREF(ret);
1669 return NULL;
1670}
1671
1672static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001673 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1674 BZ2Comp_compress__doc__},
1675 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1676 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001677 {NULL, NULL} /* sentinel */
1678};
1679
1680
1681/* ===================================================================== */
1682/* Slot definitions for BZ2Comp_Type. */
1683
1684static int
1685BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1686{
1687 int compresslevel = 9;
1688 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001689 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001690
1691 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1692 kwlist, &compresslevel))
1693 return -1;
1694
1695 if (compresslevel < 1 || compresslevel > 9) {
1696 PyErr_SetString(PyExc_ValueError,
1697 "compresslevel must be between 1 and 9");
1698 goto error;
1699 }
1700
1701#ifdef WITH_THREAD
1702 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001703 if (!self->lock) {
1704 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001705 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001706 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001707#endif
1708
1709 memset(&self->bzs, 0, sizeof(bz_stream));
1710 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1711 if (bzerror != BZ_OK) {
1712 Util_CatchBZ2Error(bzerror);
1713 goto error;
1714 }
1715
1716 self->running = 1;
1717
1718 return 0;
1719error:
1720#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001721 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001722 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001723 self->lock = NULL;
1724 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001725#endif
1726 return -1;
1727}
1728
1729static void
1730BZ2Comp_dealloc(BZ2CompObject *self)
1731{
1732#ifdef WITH_THREAD
1733 if (self->lock)
1734 PyThread_free_lock(self->lock);
1735#endif
1736 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001737 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001738}
1739
1740
1741/* ===================================================================== */
1742/* BZ2Comp_Type definition. */
1743
1744PyDoc_STRVAR(BZ2Comp__doc__,
1745"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1746\n\
1747Create a new compressor object. This object may be used to compress\n\
1748data sequentially. If you want to compress data in one shot, use the\n\
1749compress() function instead. The compresslevel parameter, if given,\n\
1750must be a number between 1 and 9.\n\
1751");
1752
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001753static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001754 PyObject_HEAD_INIT(NULL)
1755 0, /*ob_size*/
1756 "bz2.BZ2Compressor", /*tp_name*/
1757 sizeof(BZ2CompObject), /*tp_basicsize*/
1758 0, /*tp_itemsize*/
1759 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1760 0, /*tp_print*/
1761 0, /*tp_getattr*/
1762 0, /*tp_setattr*/
1763 0, /*tp_compare*/
1764 0, /*tp_repr*/
1765 0, /*tp_as_number*/
1766 0, /*tp_as_sequence*/
1767 0, /*tp_as_mapping*/
1768 0, /*tp_hash*/
1769 0, /*tp_call*/
1770 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001771 PyObject_GenericGetAttr,/*tp_getattro*/
1772 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001773 0, /*tp_as_buffer*/
1774 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1775 BZ2Comp__doc__, /*tp_doc*/
1776 0, /*tp_traverse*/
1777 0, /*tp_clear*/
1778 0, /*tp_richcompare*/
1779 0, /*tp_weaklistoffset*/
1780 0, /*tp_iter*/
1781 0, /*tp_iternext*/
1782 BZ2Comp_methods, /*tp_methods*/
1783 0, /*tp_members*/
1784 0, /*tp_getset*/
1785 0, /*tp_base*/
1786 0, /*tp_dict*/
1787 0, /*tp_descr_get*/
1788 0, /*tp_descr_set*/
1789 0, /*tp_dictoffset*/
1790 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001791 PyType_GenericAlloc, /*tp_alloc*/
1792 PyType_GenericNew, /*tp_new*/
1793 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001794 0, /*tp_is_gc*/
1795};
1796
1797
1798/* ===================================================================== */
1799/* Members of BZ2Decomp. */
1800
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001801#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001802#define OFF(x) offsetof(BZ2DecompObject, x)
1803
1804static PyMemberDef BZ2Decomp_members[] = {
1805 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1806 {NULL} /* Sentinel */
1807};
1808
1809
1810/* ===================================================================== */
1811/* Methods of BZ2Decomp. */
1812
1813PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1814"decompress(data) -> string\n\
1815\n\
1816Provide more data to the decompressor object. It will return chunks\n\
1817of decompressed data whenever possible. If you try to decompress data\n\
1818after the end of stream is found, EOFError will be raised. If any data\n\
1819was found after the end of stream, it'll be ignored and saved in\n\
1820unused_data attribute.\n\
1821");
1822
1823static PyObject *
1824BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1825{
1826 char *data;
1827 int datasize;
1828 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001829 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001830 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001831 bz_stream *bzs = &self->bzs;
1832 int bzerror;
1833
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001834 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001835 return NULL;
1836
1837 ACQUIRE_LOCK(self);
1838 if (!self->running) {
1839 PyErr_SetString(PyExc_EOFError, "end of stream was "
1840 "already found");
1841 goto error;
1842 }
1843
Guido van Rossum522a6c62007-05-22 23:13:45 +00001844 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001845 if (!ret)
1846 goto error;
1847
1848 bzs->next_in = data;
1849 bzs->avail_in = datasize;
1850 bzs->next_out = BUF(ret);
1851 bzs->avail_out = bufsize;
1852
1853 totalout = BZS_TOTAL_OUT(bzs);
1854
1855 for (;;) {
1856 Py_BEGIN_ALLOW_THREADS
1857 bzerror = BZ2_bzDecompress(bzs);
1858 Py_END_ALLOW_THREADS
1859 if (bzerror == BZ_STREAM_END) {
1860 if (bzs->avail_in != 0) {
1861 Py_DECREF(self->unused_data);
1862 self->unused_data =
Guido van Rossum522a6c62007-05-22 23:13:45 +00001863 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001864 bzs->avail_in);
1865 }
1866 self->running = 0;
1867 break;
1868 }
1869 if (bzerror != BZ_OK) {
1870 Util_CatchBZ2Error(bzerror);
1871 goto error;
1872 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001873 if (bzs->avail_in == 0)
1874 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001875 if (bzs->avail_out == 0) {
1876 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001877 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001878 BZ2_bzDecompressEnd(bzs);
1879 goto error;
1880 }
1881 bzs->next_out = BUF(ret);
1882 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1883 - totalout);
1884 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001885 }
1886 }
1887
Guido van Rossum522a6c62007-05-22 23:13:45 +00001888 if (bzs->avail_out != 0) {
1889 if (PyBytes_Resize(ret,
1890 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1891 goto error;
1892 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001893
1894 RELEASE_LOCK(self);
1895 return ret;
1896
1897error:
1898 RELEASE_LOCK(self);
1899 Py_XDECREF(ret);
1900 return NULL;
1901}
1902
1903static PyMethodDef BZ2Decomp_methods[] = {
1904 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1905 {NULL, NULL} /* sentinel */
1906};
1907
1908
1909/* ===================================================================== */
1910/* Slot definitions for BZ2Decomp_Type. */
1911
1912static int
1913BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1914{
1915 int bzerror;
1916
1917 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1918 return -1;
1919
1920#ifdef WITH_THREAD
1921 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001922 if (!self->lock) {
1923 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001924 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001925 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001926#endif
1927
Guido van Rossum522a6c62007-05-22 23:13:45 +00001928 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001929 if (!self->unused_data)
1930 goto error;
1931
1932 memset(&self->bzs, 0, sizeof(bz_stream));
1933 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1934 if (bzerror != BZ_OK) {
1935 Util_CatchBZ2Error(bzerror);
1936 goto error;
1937 }
1938
1939 self->running = 1;
1940
1941 return 0;
1942
1943error:
1944#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001945 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001946 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001947 self->lock = NULL;
1948 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001950 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951 return -1;
1952}
1953
1954static void
1955BZ2Decomp_dealloc(BZ2DecompObject *self)
1956{
1957#ifdef WITH_THREAD
1958 if (self->lock)
1959 PyThread_free_lock(self->lock);
1960#endif
1961 Py_XDECREF(self->unused_data);
1962 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001963 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001964}
1965
1966
1967/* ===================================================================== */
1968/* BZ2Decomp_Type definition. */
1969
1970PyDoc_STRVAR(BZ2Decomp__doc__,
1971"BZ2Decompressor() -> decompressor object\n\
1972\n\
1973Create a new decompressor object. This object may be used to decompress\n\
1974data sequentially. If you want to decompress data in one shot, use the\n\
1975decompress() function instead.\n\
1976");
1977
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001978static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001979 PyObject_HEAD_INIT(NULL)
1980 0, /*ob_size*/
1981 "bz2.BZ2Decompressor", /*tp_name*/
1982 sizeof(BZ2DecompObject), /*tp_basicsize*/
1983 0, /*tp_itemsize*/
1984 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1985 0, /*tp_print*/
1986 0, /*tp_getattr*/
1987 0, /*tp_setattr*/
1988 0, /*tp_compare*/
1989 0, /*tp_repr*/
1990 0, /*tp_as_number*/
1991 0, /*tp_as_sequence*/
1992 0, /*tp_as_mapping*/
1993 0, /*tp_hash*/
1994 0, /*tp_call*/
1995 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001996 PyObject_GenericGetAttr,/*tp_getattro*/
1997 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001998 0, /*tp_as_buffer*/
1999 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2000 BZ2Decomp__doc__, /*tp_doc*/
2001 0, /*tp_traverse*/
2002 0, /*tp_clear*/
2003 0, /*tp_richcompare*/
2004 0, /*tp_weaklistoffset*/
2005 0, /*tp_iter*/
2006 0, /*tp_iternext*/
2007 BZ2Decomp_methods, /*tp_methods*/
2008 BZ2Decomp_members, /*tp_members*/
2009 0, /*tp_getset*/
2010 0, /*tp_base*/
2011 0, /*tp_dict*/
2012 0, /*tp_descr_get*/
2013 0, /*tp_descr_set*/
2014 0, /*tp_dictoffset*/
2015 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002016 PyType_GenericAlloc, /*tp_alloc*/
2017 PyType_GenericNew, /*tp_new*/
2018 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019 0, /*tp_is_gc*/
2020};
2021
2022
2023/* ===================================================================== */
2024/* Module functions. */
2025
2026PyDoc_STRVAR(bz2_compress__doc__,
2027"compress(data [, compresslevel=9]) -> string\n\
2028\n\
2029Compress data in one shot. If you want to compress data sequentially,\n\
2030use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2031given, must be a number between 1 and 9.\n\
2032");
2033
2034static PyObject *
2035bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2036{
2037 int compresslevel=9;
2038 char *data;
2039 int datasize;
2040 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002041 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002042 bz_stream _bzs;
2043 bz_stream *bzs = &_bzs;
2044 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002045 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002046
2047 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2048 kwlist, &data, &datasize,
2049 &compresslevel))
2050 return NULL;
2051
2052 if (compresslevel < 1 || compresslevel > 9) {
2053 PyErr_SetString(PyExc_ValueError,
2054 "compresslevel must be between 1 and 9");
2055 return NULL;
2056 }
2057
2058 /* Conforming to bz2 manual, this is large enough to fit compressed
2059 * data in one shot. We will check it later anyway. */
2060 bufsize = datasize + (datasize/100+1) + 600;
2061
Guido van Rossum522a6c62007-05-22 23:13:45 +00002062 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002063 if (!ret)
2064 return NULL;
2065
2066 memset(bzs, 0, sizeof(bz_stream));
2067
2068 bzs->next_in = data;
2069 bzs->avail_in = datasize;
2070 bzs->next_out = BUF(ret);
2071 bzs->avail_out = bufsize;
2072
2073 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2074 if (bzerror != BZ_OK) {
2075 Util_CatchBZ2Error(bzerror);
2076 Py_DECREF(ret);
2077 return NULL;
2078 }
Tim Peterse3228092002-11-09 04:21:44 +00002079
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002080 for (;;) {
2081 Py_BEGIN_ALLOW_THREADS
2082 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2083 Py_END_ALLOW_THREADS
2084 if (bzerror == BZ_STREAM_END) {
2085 break;
2086 } else if (bzerror != BZ_FINISH_OK) {
2087 BZ2_bzCompressEnd(bzs);
2088 Util_CatchBZ2Error(bzerror);
2089 Py_DECREF(ret);
2090 return NULL;
2091 }
2092 if (bzs->avail_out == 0) {
2093 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00002094 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002095 BZ2_bzCompressEnd(bzs);
2096 Py_DECREF(ret);
2097 return NULL;
2098 }
2099 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2100 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2101 }
2102 }
2103
Guido van Rossum522a6c62007-05-22 23:13:45 +00002104 if (bzs->avail_out != 0) {
2105 if (PyBytes_Resize(ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2106 Py_DECREF(ret);
2107 ret = NULL;
2108 }
2109 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002110 BZ2_bzCompressEnd(bzs);
2111
2112 return ret;
2113}
2114
2115PyDoc_STRVAR(bz2_decompress__doc__,
2116"decompress(data) -> decompressed data\n\
2117\n\
2118Decompress data in one shot. If you want to decompress data sequentially,\n\
2119use an instance of BZ2Decompressor instead.\n\
2120");
2121
2122static PyObject *
2123bz2_decompress(PyObject *self, PyObject *args)
2124{
2125 char *data;
2126 int datasize;
2127 int bufsize = SMALLCHUNK;
2128 PyObject *ret;
2129 bz_stream _bzs;
2130 bz_stream *bzs = &_bzs;
2131 int bzerror;
2132
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002133 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002134 return NULL;
2135
2136 if (datasize == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +00002137 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002138
Guido van Rossum522a6c62007-05-22 23:13:45 +00002139 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002140 if (!ret)
2141 return NULL;
2142
2143 memset(bzs, 0, sizeof(bz_stream));
2144
2145 bzs->next_in = data;
2146 bzs->avail_in = datasize;
2147 bzs->next_out = BUF(ret);
2148 bzs->avail_out = bufsize;
2149
2150 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2151 if (bzerror != BZ_OK) {
2152 Util_CatchBZ2Error(bzerror);
2153 Py_DECREF(ret);
2154 return NULL;
2155 }
Tim Peterse3228092002-11-09 04:21:44 +00002156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157 for (;;) {
2158 Py_BEGIN_ALLOW_THREADS
2159 bzerror = BZ2_bzDecompress(bzs);
2160 Py_END_ALLOW_THREADS
2161 if (bzerror == BZ_STREAM_END) {
2162 break;
2163 } else if (bzerror != BZ_OK) {
2164 BZ2_bzDecompressEnd(bzs);
2165 Util_CatchBZ2Error(bzerror);
2166 Py_DECREF(ret);
2167 return NULL;
2168 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002169 if (bzs->avail_in == 0) {
2170 BZ2_bzDecompressEnd(bzs);
2171 PyErr_SetString(PyExc_ValueError,
2172 "couldn't find end of stream");
2173 Py_DECREF(ret);
2174 return NULL;
2175 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176 if (bzs->avail_out == 0) {
2177 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00002178 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002179 BZ2_bzDecompressEnd(bzs);
2180 Py_DECREF(ret);
2181 return NULL;
2182 }
2183 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2184 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002185 }
2186 }
2187
Guido van Rossum522a6c62007-05-22 23:13:45 +00002188 if (bzs->avail_out != 0) {
2189 if (PyBytes_Resize(ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2190 Py_DECREF(ret);
2191 ret = NULL;
2192 }
2193 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002194 BZ2_bzDecompressEnd(bzs);
2195
2196 return ret;
2197}
2198
2199static PyMethodDef bz2_methods[] = {
2200 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2201 bz2_compress__doc__},
2202 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2203 bz2_decompress__doc__},
2204 {NULL, NULL} /* sentinel */
2205};
2206
2207/* ===================================================================== */
2208/* Initialization function. */
2209
2210PyDoc_STRVAR(bz2__doc__,
2211"The python bz2 module provides a comprehensive interface for\n\
2212the bz2 compression library. It implements a complete file\n\
2213interface, one shot (de)compression functions, and types for\n\
2214sequential (de)compression.\n\
2215");
2216
Neal Norwitz21d896c2003-07-01 20:15:21 +00002217PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002218initbz2(void)
2219{
2220 PyObject *m;
2221
2222 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002223 BZ2Comp_Type.ob_type = &PyType_Type;
2224 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002225
2226 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002227 if (m == NULL)
2228 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002229
2230 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2231
2232 Py_INCREF(&BZ2File_Type);
2233 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2234
2235 Py_INCREF(&BZ2Comp_Type);
2236 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2237
2238 Py_INCREF(&BZ2Decomp_Type);
2239 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2240}