blob: 029232c4e9eadd79af7a68ba61440d141cc54945 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Guido van Rossum522a6c62007-05-22 23:13:45 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105 int f_univ_newline; /* Handle any newline convention */
106 int f_newlinetypes; /* Types of newlines seen */
107 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000108
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000109 BZFILE *fp;
110 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000111 Py_off_t pos;
112 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000113#ifdef WITH_THREAD
114 PyThread_type_lock lock;
115#endif
116} BZ2FileObject;
117
118typedef struct {
119 PyObject_HEAD
120 bz_stream bzs;
121 int running;
122#ifdef WITH_THREAD
123 PyThread_type_lock lock;
124#endif
125} BZ2CompObject;
126
127typedef struct {
128 PyObject_HEAD
129 bz_stream bzs;
130 int running;
131 PyObject *unused_data;
132#ifdef WITH_THREAD
133 PyThread_type_lock lock;
134#endif
135} BZ2DecompObject;
136
137/* ===================================================================== */
138/* Utility functions. */
139
140static int
141Util_CatchBZ2Error(int bzerror)
142{
143 int ret = 0;
144 switch(bzerror) {
145 case BZ_OK:
146 case BZ_STREAM_END:
147 break;
148
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000149#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000150 case BZ_CONFIG_ERROR:
151 PyErr_SetString(PyExc_SystemError,
152 "the bz2 library was not compiled "
153 "correctly");
154 ret = 1;
155 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000156#endif
Tim Peterse3228092002-11-09 04:21:44 +0000157
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000158 case BZ_PARAM_ERROR:
159 PyErr_SetString(PyExc_ValueError,
160 "the bz2 library has received wrong "
161 "parameters");
162 ret = 1;
163 break;
Tim Peterse3228092002-11-09 04:21:44 +0000164
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165 case BZ_MEM_ERROR:
166 PyErr_NoMemory();
167 ret = 1;
168 break;
169
170 case BZ_DATA_ERROR:
171 case BZ_DATA_ERROR_MAGIC:
172 PyErr_SetString(PyExc_IOError, "invalid data stream");
173 ret = 1;
174 break;
175
176 case BZ_IO_ERROR:
177 PyErr_SetString(PyExc_IOError, "unknown IO error");
178 ret = 1;
179 break;
180
181 case BZ_UNEXPECTED_EOF:
182 PyErr_SetString(PyExc_EOFError,
183 "compressed file ended before the "
184 "logical end-of-stream was detected");
185 ret = 1;
186 break;
187
188 case BZ_SEQUENCE_ERROR:
189 PyErr_SetString(PyExc_RuntimeError,
190 "wrong sequence of bz2 library "
191 "commands used");
192 ret = 1;
193 break;
194 }
195 return ret;
196}
197
198#if BUFSIZ < 8192
199#define SMALLCHUNK 8192
200#else
201#define SMALLCHUNK BUFSIZ
202#endif
203
204#if SIZEOF_INT < 4
205#define BIGCHUNK (512 * 32)
206#else
207#define BIGCHUNK (512 * 1024)
208#endif
209
210/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
211static size_t
212Util_NewBufferSize(size_t currentsize)
213{
214 if (currentsize > SMALLCHUNK) {
215 /* Keep doubling until we reach BIGCHUNK;
216 then keep adding BIGCHUNK. */
217 if (currentsize <= BIGCHUNK)
218 return currentsize + currentsize;
219 else
220 return currentsize + BIGCHUNK;
221 }
222 return currentsize + SMALLCHUNK;
223}
224
225/* This is a hacked version of Python's fileobject.c:get_line(). */
226static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000227Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000228{
229 char c;
230 char *buf, *end;
231 size_t total_v_size; /* total # of slots in buffer */
232 size_t used_v_size; /* # used slots in buffer */
233 size_t increment; /* amount to increment the buffer */
234 PyObject *v;
235 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000236 int newlinetypes = f->f_newlinetypes;
237 int skipnextlf = f->f_skipnextlf;
238 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000239
240 total_v_size = n > 0 ? n : 100;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000241 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000242 if (v == NULL)
243 return NULL;
244
245 buf = BUF(v);
246 end = buf + total_v_size;
247
248 for (;;) {
249 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000250 if (univ_newline) {
251 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000252 BZ2_bzRead(&bzerror, f->fp, &c, 1);
253 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254 if (bzerror != BZ_OK || buf == end)
255 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000256 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000257 skipnextlf = 0;
258 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000259 /* Seeing a \n here with
260 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000261 * saw a \r before.
262 */
263 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000264 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000265 &c, 1);
266 if (bzerror != BZ_OK)
267 break;
268 } else {
269 newlinetypes |= NEWLINE_CR;
270 }
271 }
272 if (c == '\r') {
273 skipnextlf = 1;
274 c = '\n';
275 } else if ( c == '\n')
276 newlinetypes |= NEWLINE_LF;
277 *buf++ = c;
278 if (c == '\n') break;
279 }
280 if (bzerror == BZ_STREAM_END && skipnextlf)
281 newlinetypes |= NEWLINE_CR;
282 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000283 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000284 BZ2_bzRead(&bzerror, f->fp, &c, 1);
285 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000286 *buf++ = c;
287 } while (bzerror == BZ_OK && c != '\n' && buf != end);
288 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000289 f->f_newlinetypes = newlinetypes;
290 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000292 f->size = f->pos;
293 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294 break;
295 } else if (bzerror != BZ_OK) {
296 Util_CatchBZ2Error(bzerror);
297 Py_DECREF(v);
298 return NULL;
299 }
300 if (c == '\n')
301 break;
302 /* Must be because buf == end */
303 if (n > 0)
304 break;
305 used_v_size = total_v_size;
306 increment = total_v_size >> 2; /* mild exponential growth */
307 total_v_size += increment;
308 if (total_v_size > INT_MAX) {
309 PyErr_SetString(PyExc_OverflowError,
310 "line is longer than a Python string can hold");
311 Py_DECREF(v);
312 return NULL;
313 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000314 if (PyBytes_Resize(v, total_v_size) < 0) {
315 Py_DECREF(v);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000316 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000317 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000323 if (used_v_size != total_v_size) {
324 if (PyBytes_Resize(v, used_v_size) < 0) {
325 Py_DECREF(v);
326 v = NULL;
327 }
328 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 return v;
330}
331
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332/* This is a hacked version of Python's
333 * fileobject.c:Py_UniversalNewlineFread(). */
334size_t
335Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000336 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000337{
338 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000339 int newlinetypes, skipnextlf;
340
341 assert(buf != NULL);
342 assert(stream != NULL);
343
344 if (!f->f_univ_newline)
345 return BZ2_bzRead(bzerror, stream, buf, n);
346
347 newlinetypes = f->f_newlinetypes;
348 skipnextlf = f->f_skipnextlf;
349
350 /* Invariant: n is the number of bytes remaining to be filled
351 * in the buffer.
352 */
353 while (n) {
354 size_t nread;
355 int shortread;
356 char *src = dst;
357
358 nread = BZ2_bzRead(bzerror, stream, dst, n);
359 assert(nread <= n);
360 n -= nread; /* assuming 1 byte out for each in; will adjust */
361 shortread = n != 0; /* true iff EOF or error */
362 while (nread--) {
363 char c = *src++;
364 if (c == '\r') {
365 /* Save as LF and set flag to skip next LF. */
366 *dst++ = '\n';
367 skipnextlf = 1;
368 }
369 else if (skipnextlf && c == '\n') {
370 /* Skip LF, and remember we saw CR LF. */
371 skipnextlf = 0;
372 newlinetypes |= NEWLINE_CRLF;
373 ++n;
374 }
375 else {
376 /* Normal char to be stored in buffer. Also
377 * update the newlinetypes flag if either this
378 * is an LF or the previous char was a CR.
379 */
380 if (c == '\n')
381 newlinetypes |= NEWLINE_LF;
382 else if (skipnextlf)
383 newlinetypes |= NEWLINE_CR;
384 *dst++ = c;
385 skipnextlf = 0;
386 }
387 }
388 if (shortread) {
389 /* If this is EOF, update type flags. */
390 if (skipnextlf && *bzerror == BZ_STREAM_END)
391 newlinetypes |= NEWLINE_CR;
392 break;
393 }
394 }
395 f->f_newlinetypes = newlinetypes;
396 f->f_skipnextlf = skipnextlf;
397 return dst - buf;
398}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399
400/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
401static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000404 if (f->f_buf != NULL) {
405 PyMem_Free(f->f_buf);
406 f->f_buf = NULL;
407 }
408}
409
410/* This is a hacked version of Python's fileobject.c:readahead(). */
411static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000412Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413{
414 int chunksize;
415 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000416
417 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000418 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000419 return 0;
420 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000421 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000424 f->f_bufptr = f->f_buf;
425 f->f_bufend = f->f_buf;
426 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427 }
428 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
429 return -1;
430 }
431 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000432 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000434 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000435 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 f->size = f->pos;
438 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000439 } else if (bzerror != BZ_OK) {
440 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000441 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000442 return -1;
443 }
444 f->f_bufptr = f->f_buf;
445 f->f_bufend = f->f_buf + chunksize;
446 return 0;
447}
448
449/* This is a hacked version of Python's
450 * fileobject.c:readahead_get_line_skip(). */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000451static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000452Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000453{
Guido van Rossum522a6c62007-05-22 23:13:45 +0000454 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000455 char *bufptr;
456 char *buf;
457 int len;
458
459 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000460 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return NULL;
462
463 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000464 if (len == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +0000465 return (PyBytesObject *)
466 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 bufptr = memchr(f->f_bufptr, '\n', len);
468 if (bufptr != NULL) {
469 bufptr++; /* Count the '\n' */
470 len = bufptr - f->f_bufptr;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000471 s = (PyBytesObject *)
472 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000473 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000474 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000475 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000476 f->f_bufptr = bufptr;
477 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000478 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000479 } else {
480 bufptr = f->f_bufptr;
481 buf = f->f_buf;
482 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000483 s = Util_ReadAheadGetLineSkip(f, skip+len,
484 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000485 if (s == NULL) {
486 PyMem_Free(buf);
487 return NULL;
488 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000489 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000490 PyMem_Free(buf);
491 }
492 return s;
493}
494
495/* ===================================================================== */
496/* Methods of BZ2File. */
497
498PyDoc_STRVAR(BZ2File_read__doc__,
499"read([size]) -> string\n\
500\n\
501Read at most size uncompressed bytes, returned as a string. If the size\n\
502argument is negative or omitted, read until EOF is reached.\n\
503");
504
505/* This is a hacked version of Python's fileobject.c:file_read(). */
506static PyObject *
507BZ2File_read(BZ2FileObject *self, PyObject *args)
508{
509 long bytesrequested = -1;
510 size_t bytesread, buffersize, chunksize;
511 int bzerror;
512 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000513
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000514 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000516
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000517 ACQUIRE_LOCK(self);
518 switch (self->mode) {
519 case MODE_READ:
520 break;
521 case MODE_READ_EOF:
Guido van Rossum522a6c62007-05-22 23:13:45 +0000522 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000523 goto cleanup;
524 case MODE_CLOSED:
525 PyErr_SetString(PyExc_ValueError,
526 "I/O operation on closed file");
527 goto cleanup;
528 default:
529 PyErr_SetString(PyExc_IOError,
530 "file is not ready for reading");
531 goto cleanup;
532 }
533
534 if (bytesrequested < 0)
535 buffersize = Util_NewBufferSize((size_t)0);
536 else
537 buffersize = bytesrequested;
538 if (buffersize > INT_MAX) {
539 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000540 "requested number of bytes is "
541 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000542 goto cleanup;
543 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000544 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000545 if (ret == NULL)
546 goto cleanup;
547 bytesread = 0;
548
549 for (;;) {
550 Py_BEGIN_ALLOW_THREADS
551 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
552 BUF(ret)+bytesread,
553 buffersize-bytesread,
554 self);
555 self->pos += chunksize;
556 Py_END_ALLOW_THREADS
557 bytesread += chunksize;
558 if (bzerror == BZ_STREAM_END) {
559 self->size = self->pos;
560 self->mode = MODE_READ_EOF;
561 break;
562 } else if (bzerror != BZ_OK) {
563 Util_CatchBZ2Error(bzerror);
564 Py_DECREF(ret);
565 ret = NULL;
566 goto cleanup;
567 }
568 if (bytesrequested < 0) {
569 buffersize = Util_NewBufferSize(buffersize);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000570 if (PyBytes_Resize(ret, buffersize) < 0) {
571 Py_DECREF(ret);
572 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000573 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000574 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000575 } else {
576 break;
577 }
578 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000579 if (bytesread != buffersize) {
580 if (PyBytes_Resize(ret, bytesread) < 0) {
581 Py_DECREF(ret);
582 ret = NULL;
583 }
584 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000585
586cleanup:
587 RELEASE_LOCK(self);
588 return ret;
589}
590
591PyDoc_STRVAR(BZ2File_readline__doc__,
592"readline([size]) -> string\n\
593\n\
594Return the next line from the file, as a string, retaining newline.\n\
595A non-negative size argument will limit the maximum number of bytes to\n\
596return (an incomplete line may be returned then). Return an empty\n\
597string at EOF.\n\
598");
599
600static PyObject *
601BZ2File_readline(BZ2FileObject *self, PyObject *args)
602{
603 PyObject *ret = NULL;
604 int sizehint = -1;
605
606 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
607 return NULL;
608
609 ACQUIRE_LOCK(self);
610 switch (self->mode) {
611 case MODE_READ:
612 break;
613 case MODE_READ_EOF:
Guido van Rossum522a6c62007-05-22 23:13:45 +0000614 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000615 goto cleanup;
616 case MODE_CLOSED:
617 PyErr_SetString(PyExc_ValueError,
618 "I/O operation on closed file");
619 goto cleanup;
620 default:
621 PyErr_SetString(PyExc_IOError,
622 "file is not ready for reading");
623 goto cleanup;
624 }
625
626 if (sizehint == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +0000627 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000628 else
629 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
630
631cleanup:
632 RELEASE_LOCK(self);
633 return ret;
634}
635
636PyDoc_STRVAR(BZ2File_readlines__doc__,
637"readlines([size]) -> list\n\
638\n\
639Call readline() repeatedly and return a list of lines read.\n\
640The optional size argument, if given, is an approximate bound on the\n\
641total number of bytes in the lines returned.\n\
642");
643
644/* This is a hacked version of Python's fileobject.c:file_readlines(). */
645static PyObject *
646BZ2File_readlines(BZ2FileObject *self, PyObject *args)
647{
648 long sizehint = 0;
649 PyObject *list = NULL;
650 PyObject *line;
651 char small_buffer[SMALLCHUNK];
652 char *buffer = small_buffer;
653 size_t buffersize = SMALLCHUNK;
654 PyObject *big_buffer = NULL;
655 size_t nfilled = 0;
656 size_t nread;
657 size_t totalread = 0;
658 char *p, *q, *end;
659 int err;
660 int shortread = 0;
661 int bzerror;
662
663 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
664 return NULL;
665
666 ACQUIRE_LOCK(self);
667 switch (self->mode) {
668 case MODE_READ:
669 break;
670 case MODE_READ_EOF:
671 list = PyList_New(0);
672 goto cleanup;
673 case MODE_CLOSED:
674 PyErr_SetString(PyExc_ValueError,
675 "I/O operation on closed file");
676 goto cleanup;
677 default:
678 PyErr_SetString(PyExc_IOError,
679 "file is not ready for reading");
680 goto cleanup;
681 }
682
683 if ((list = PyList_New(0)) == NULL)
684 goto cleanup;
685
686 for (;;) {
687 Py_BEGIN_ALLOW_THREADS
688 nread = Util_UnivNewlineRead(&bzerror, self->fp,
689 buffer+nfilled,
690 buffersize-nfilled, self);
691 self->pos += nread;
692 Py_END_ALLOW_THREADS
693 if (bzerror == BZ_STREAM_END) {
694 self->size = self->pos;
695 self->mode = MODE_READ_EOF;
696 if (nread == 0) {
697 sizehint = 0;
698 break;
699 }
700 shortread = 1;
701 } else if (bzerror != BZ_OK) {
702 Util_CatchBZ2Error(bzerror);
703 error:
704 Py_DECREF(list);
705 list = NULL;
706 goto cleanup;
707 }
708 totalread += nread;
709 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000710 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000711 /* Need a larger buffer to fit this line */
712 nfilled += nread;
713 buffersize *= 2;
714 if (buffersize > INT_MAX) {
715 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000716 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000717 goto error;
718 }
719 if (big_buffer == NULL) {
720 /* Create the big buffer */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000721 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 NULL, buffersize);
723 if (big_buffer == NULL)
724 goto error;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000725 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000726 memcpy(buffer, small_buffer, nfilled);
727 }
728 else {
729 /* Grow the big buffer */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000730 if (PyBytes_Resize(big_buffer, buffersize) < 0){
731 Py_DECREF(big_buffer);
732 big_buffer = NULL;
733 goto error;
734 }
735 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000736 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000737 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000738 }
739 end = buffer+nfilled+nread;
740 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000741 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000742 /* Process complete lines */
743 p++;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000744 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000745 if (line == NULL)
746 goto error;
747 err = PyList_Append(list, line);
748 Py_DECREF(line);
749 if (err != 0)
750 goto error;
751 q = p;
752 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000753 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000754 /* Move the remaining incomplete line to the start */
755 nfilled = end-q;
756 memmove(buffer, q, nfilled);
757 if (sizehint > 0)
758 if (totalread >= (size_t)sizehint)
759 break;
760 if (shortread) {
761 sizehint = 0;
762 break;
763 }
764 }
765 if (nfilled != 0) {
766 /* Partial last line */
Guido van Rossum522a6c62007-05-22 23:13:45 +0000767 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000768 if (line == NULL)
769 goto error;
770 if (sizehint > 0) {
771 /* Need to complete the last line */
772 PyObject *rest = Util_GetLine(self, 0);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000773 PyObject *new;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000774 if (rest == NULL) {
775 Py_DECREF(line);
776 goto error;
777 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000778 new = PyBytes_Concat(line, rest);
779 Py_DECREF(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000780 Py_DECREF(rest);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000781 line = new;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000782 if (line == NULL)
783 goto error;
784 }
785 err = PyList_Append(list, line);
786 Py_DECREF(line);
787 if (err != 0)
788 goto error;
789 }
790
791 cleanup:
792 RELEASE_LOCK(self);
793 if (big_buffer) {
794 Py_DECREF(big_buffer);
795 }
796 return list;
797}
798
799PyDoc_STRVAR(BZ2File_write__doc__,
800"write(data) -> None\n\
801\n\
802Write the 'data' string to file. Note that due to buffering, close() may\n\
803be needed before the file on disk reflects the data written.\n\
804");
805
806/* This is a hacked version of Python's fileobject.c:file_write(). */
807static PyObject *
808BZ2File_write(BZ2FileObject *self, PyObject *args)
809{
810 PyObject *ret = NULL;
811 char *buf;
812 int len;
813 int bzerror;
814
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000815 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000817
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000818 ACQUIRE_LOCK(self);
819 switch (self->mode) {
820 case MODE_WRITE:
821 break;
Tim Peterse3228092002-11-09 04:21:44 +0000822
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000823 case MODE_CLOSED:
824 PyErr_SetString(PyExc_ValueError,
825 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000826 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000827
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 default:
829 PyErr_SetString(PyExc_IOError,
830 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000831 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000832 }
833
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000834 Py_BEGIN_ALLOW_THREADS
835 BZ2_bzWrite (&bzerror, self->fp, buf, len);
836 self->pos += len;
837 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000838
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000839 if (bzerror != BZ_OK) {
840 Util_CatchBZ2Error(bzerror);
841 goto cleanup;
842 }
Tim Peterse3228092002-11-09 04:21:44 +0000843
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000844 Py_INCREF(Py_None);
845 ret = Py_None;
846
847cleanup:
848 RELEASE_LOCK(self);
849 return ret;
850}
851
852PyDoc_STRVAR(BZ2File_writelines__doc__,
853"writelines(sequence_of_strings) -> None\n\
854\n\
855Write the sequence of strings to the file. Note that newlines are not\n\
856added. The sequence can be any iterable object producing strings. This is\n\
857equivalent to calling write() for each string.\n\
858");
859
860/* This is a hacked version of Python's fileobject.c:file_writelines(). */
861static PyObject *
862BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
863{
864#define CHUNKSIZE 1000
865 PyObject *list = NULL;
866 PyObject *iter = NULL;
867 PyObject *ret = NULL;
868 PyObject *line;
869 int i, j, index, len, islist;
870 int bzerror;
871
872 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000873 switch (self->mode) {
874 case MODE_WRITE:
875 break;
876
877 case MODE_CLOSED:
878 PyErr_SetString(PyExc_ValueError,
879 "I/O operation on closed file");
880 goto error;
881
882 default:
883 PyErr_SetString(PyExc_IOError,
884 "file is not ready for writing");
885 goto error;
886 }
887
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000888 islist = PyList_Check(seq);
889 if (!islist) {
890 iter = PyObject_GetIter(seq);
891 if (iter == NULL) {
892 PyErr_SetString(PyExc_TypeError,
893 "writelines() requires an iterable argument");
894 goto error;
895 }
896 list = PyList_New(CHUNKSIZE);
897 if (list == NULL)
898 goto error;
899 }
900
901 /* Strategy: slurp CHUNKSIZE lines into a private list,
902 checking that they are all strings, then write that list
903 without holding the interpreter lock, then come back for more. */
904 for (index = 0; ; index += CHUNKSIZE) {
905 if (islist) {
906 Py_XDECREF(list);
907 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
908 if (list == NULL)
909 goto error;
910 j = PyList_GET_SIZE(list);
911 }
912 else {
913 for (j = 0; j < CHUNKSIZE; j++) {
914 line = PyIter_Next(iter);
915 if (line == NULL) {
916 if (PyErr_Occurred())
917 goto error;
918 break;
919 }
920 PyList_SetItem(list, j, line);
921 }
922 }
923 if (j == 0)
924 break;
925
Guido van Rossum522a6c62007-05-22 23:13:45 +0000926 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000927 apply the same rules as for file.write() and
928 convert the rets to strings. This is slow, but
929 seems to be the only way since all conversion APIs
930 could potentially execute Python code. */
931 for (i = 0; i < j; i++) {
932 PyObject *v = PyList_GET_ITEM(list, i);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000933 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000934 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000936 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
937 PyErr_SetString(PyExc_TypeError,
938 "writelines() "
939 "argument must be "
940 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000941 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942 goto error;
943 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000944 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000945 len);
946 if (line == NULL)
947 goto error;
948 Py_DECREF(v);
949 PyList_SET_ITEM(list, i, line);
950 }
951 }
952
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000953 /* Since we are releasing the global lock, the
954 following code may *not* execute Python code. */
955 Py_BEGIN_ALLOW_THREADS
956 for (i = 0; i < j; i++) {
957 line = PyList_GET_ITEM(list, i);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000958 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959 BZ2_bzWrite (&bzerror, self->fp,
Guido van Rossum522a6c62007-05-22 23:13:45 +0000960 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000961 if (bzerror != BZ_OK) {
962 Py_BLOCK_THREADS
963 Util_CatchBZ2Error(bzerror);
964 goto error;
965 }
966 }
967 Py_END_ALLOW_THREADS
968
969 if (j < CHUNKSIZE)
970 break;
971 }
972
973 Py_INCREF(Py_None);
974 ret = Py_None;
975
976 error:
977 RELEASE_LOCK(self);
978 Py_XDECREF(list);
979 Py_XDECREF(iter);
980 return ret;
981#undef CHUNKSIZE
982}
983
984PyDoc_STRVAR(BZ2File_seek__doc__,
985"seek(offset [, whence]) -> None\n\
986\n\
987Move to new file position. Argument offset is a byte count. Optional\n\
988argument whence defaults to 0 (offset from start of file, offset\n\
989should be >= 0); other values are 1 (move relative to current position,\n\
990positive or negative), and 2 (move relative to end of file, usually\n\
991negative, although many platforms allow seeking beyond the end of a file).\n\
992\n\
993Note that seeking of bz2 files is emulated, and depending on the parameters\n\
994the operation may be extremely slow.\n\
995");
996
997static PyObject *
998BZ2File_seek(BZ2FileObject *self, PyObject *args)
999{
1000 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001001 PyObject *offobj;
1002 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001003 char small_buffer[SMALLCHUNK];
1004 char *buffer = small_buffer;
1005 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001006 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +00001007 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001008 int chunksize;
1009 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001010 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001011
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001012 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1013 return NULL;
1014#if !defined(HAVE_LARGEFILE_SUPPORT)
1015 offset = PyInt_AsLong(offobj);
1016#else
1017 offset = PyLong_Check(offobj) ?
1018 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1019#endif
1020 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001021 return NULL;
1022
1023 ACQUIRE_LOCK(self);
1024 Util_DropReadAhead(self);
1025 switch (self->mode) {
1026 case MODE_READ:
1027 case MODE_READ_EOF:
1028 break;
Tim Peterse3228092002-11-09 04:21:44 +00001029
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001030 case MODE_CLOSED:
1031 PyErr_SetString(PyExc_ValueError,
1032 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +00001033 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001034
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001035 default:
1036 PyErr_SetString(PyExc_IOError,
1037 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +00001038 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039 }
1040
Georg Brandl47fab922006-02-18 21:57:25 +00001041 if (where == 2) {
1042 if (self->size == -1) {
1043 assert(self->mode != MODE_READ_EOF);
1044 for (;;) {
1045 Py_BEGIN_ALLOW_THREADS
1046 chunksize = Util_UnivNewlineRead(
1047 &bzerror, self->fp,
1048 buffer, buffersize,
1049 self);
1050 self->pos += chunksize;
1051 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052
Georg Brandl47fab922006-02-18 21:57:25 +00001053 bytesread += chunksize;
1054 if (bzerror == BZ_STREAM_END) {
1055 break;
1056 } else if (bzerror != BZ_OK) {
1057 Util_CatchBZ2Error(bzerror);
1058 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001059 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001060 }
Georg Brandl47fab922006-02-18 21:57:25 +00001061 self->mode = MODE_READ_EOF;
1062 self->size = self->pos;
1063 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001064 }
Georg Brandl47fab922006-02-18 21:57:25 +00001065 offset = self->size + offset;
1066 } else if (where == 1) {
1067 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001068 }
1069
Guido van Rossum522a6c62007-05-22 23:13:45 +00001070 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +00001071 * pointer should be set to. */
1072
1073 if (offset >= self->pos) {
1074 /* we can move forward */
1075 offset -= self->pos;
1076 } else {
1077 /* we cannot move back, so rewind the stream */
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001078 FILE *fp = NULL; /* XXX temporary!!! */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001079 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080 if (bzerror != BZ_OK) {
1081 Util_CatchBZ2Error(bzerror);
1082 goto cleanup;
1083 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001084 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001085 if (!ret)
1086 goto cleanup;
1087 Py_DECREF(ret);
1088 ret = NULL;
1089 self->pos = 0;
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001090 self->fp = BZ2_bzReadOpen(&bzerror, fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001091 0, 0, NULL, 0);
1092 if (bzerror != BZ_OK) {
1093 Util_CatchBZ2Error(bzerror);
1094 goto cleanup;
1095 }
1096 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001097 }
1098
Georg Brandl47fab922006-02-18 21:57:25 +00001099 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001100 goto exit;
1101
1102 /* Before getting here, offset must be set to the number of bytes
1103 * to walk forward. */
1104 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001105 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001106 readsize = buffersize;
1107 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001108 /* offset might be wider that readsize, but the result
1109 * of the subtraction is bound by buffersize (see the
1110 * condition above). buffersize is 8192. */
1111 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001112 Py_BEGIN_ALLOW_THREADS
1113 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1114 buffer, readsize, self);
1115 self->pos += chunksize;
1116 Py_END_ALLOW_THREADS
1117 bytesread += chunksize;
1118 if (bzerror == BZ_STREAM_END) {
1119 self->size = self->pos;
1120 self->mode = MODE_READ_EOF;
1121 break;
1122 } else if (bzerror != BZ_OK) {
1123 Util_CatchBZ2Error(bzerror);
1124 goto cleanup;
1125 }
1126 if (bytesread == offset)
1127 break;
1128 }
1129
1130exit:
1131 Py_INCREF(Py_None);
1132 ret = Py_None;
1133
1134cleanup:
1135 RELEASE_LOCK(self);
1136 return ret;
1137}
1138
1139PyDoc_STRVAR(BZ2File_tell__doc__,
1140"tell() -> int\n\
1141\n\
1142Return the current file position, an integer (may be a long integer).\n\
1143");
1144
1145static PyObject *
1146BZ2File_tell(BZ2FileObject *self, PyObject *args)
1147{
1148 PyObject *ret = NULL;
1149
1150 if (self->mode == MODE_CLOSED) {
1151 PyErr_SetString(PyExc_ValueError,
1152 "I/O operation on closed file");
1153 goto cleanup;
1154 }
1155
Georg Brandla8bcecc2005-09-03 07:49:53 +00001156#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001157 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001158#else
1159 ret = PyLong_FromLongLong(self->pos);
1160#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001161
1162cleanup:
1163 return ret;
1164}
1165
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166PyDoc_STRVAR(BZ2File_close__doc__,
1167"close() -> None or (perhaps) an integer\n\
1168\n\
1169Close the file. Sets data attribute .closed to true. A closed file\n\
1170cannot be used for further I/O operations. close() may be called more\n\
1171than once without error.\n\
1172");
1173
1174static PyObject *
1175BZ2File_close(BZ2FileObject *self)
1176{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001177 PyObject *ret = NULL;
1178 int bzerror = BZ_OK;
1179
1180 ACQUIRE_LOCK(self);
1181 switch (self->mode) {
1182 case MODE_READ:
1183 case MODE_READ_EOF:
1184 BZ2_bzReadClose(&bzerror, self->fp);
1185 break;
1186 case MODE_WRITE:
1187 BZ2_bzWriteClose(&bzerror, self->fp,
1188 0, NULL, NULL);
1189 break;
1190 }
1191 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001192 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001193 if (bzerror != BZ_OK) {
1194 Util_CatchBZ2Error(bzerror);
1195 Py_XDECREF(ret);
1196 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001197 }
1198
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001199 RELEASE_LOCK(self);
1200 return ret;
1201}
1202
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001203static PyObject *BZ2File_getiter(BZ2FileObject *self);
1204
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001205static PyMethodDef BZ2File_methods[] = {
1206 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1207 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1208 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1209 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1210 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1211 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1212 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001213 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1214 {NULL, NULL} /* sentinel */
1215};
1216
1217
1218/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001219/* Getters and setters of BZ2File. */
1220
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001221/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1222static PyObject *
1223BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1224{
1225 switch (self->f_newlinetypes) {
1226 case NEWLINE_UNKNOWN:
1227 Py_INCREF(Py_None);
1228 return Py_None;
1229 case NEWLINE_CR:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001230 return PyBytes_FromStringAndSize("\r", 1);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001231 case NEWLINE_LF:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001232 return PyBytes_FromStringAndSize("\n", 1);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001233 case NEWLINE_CR|NEWLINE_LF:
1234 return Py_BuildValue("(ss)", "\r", "\n");
1235 case NEWLINE_CRLF:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001236 return PyBytes_FromStringAndSize("\r\n", 2);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001237 case NEWLINE_CR|NEWLINE_CRLF:
1238 return Py_BuildValue("(ss)", "\r", "\r\n");
1239 case NEWLINE_LF|NEWLINE_CRLF:
1240 return Py_BuildValue("(ss)", "\n", "\r\n");
1241 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1242 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1243 default:
Guido van Rossum522a6c62007-05-22 23:13:45 +00001244 PyErr_Format(PyExc_SystemError,
1245 "Unknown newlines value 0x%x\n",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001246 self->f_newlinetypes);
1247 return NULL;
1248 }
1249}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001250
1251static PyObject *
1252BZ2File_get_closed(BZ2FileObject *self, void *closure)
1253{
1254 return PyInt_FromLong(self->mode == MODE_CLOSED);
1255}
1256
1257static PyObject *
1258BZ2File_get_mode(BZ2FileObject *self, void *closure)
1259{
1260 return PyObject_GetAttrString(self->file, "mode");
1261}
1262
1263static PyObject *
1264BZ2File_get_name(BZ2FileObject *self, void *closure)
1265{
1266 return PyObject_GetAttrString(self->file, "name");
1267}
1268
1269static PyGetSetDef BZ2File_getset[] = {
1270 {"closed", (getter)BZ2File_get_closed, NULL,
1271 "True if the file is closed"},
Guido van Rossum522a6c62007-05-22 23:13:45 +00001272 {"newlines", (getter)BZ2File_get_newlines, NULL,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001273 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001274 {"mode", (getter)BZ2File_get_mode, NULL,
1275 "file mode ('r', 'w', or 'U')"},
1276 {"name", (getter)BZ2File_get_name, NULL,
1277 "file name"},
1278 {NULL} /* Sentinel */
1279};
1280
1281
1282/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001283/* Slot definitions for BZ2File_Type. */
1284
1285static int
1286BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1287{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001288 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001289 "compresslevel", 0};
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001290 FILE *fp = NULL; /* XXX temporary!!! */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001291 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001292 char *mode = "r";
1293 int buffering = -1;
1294 int compresslevel = 9;
1295 int bzerror;
1296 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001297
1298 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001299
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001300 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1301 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001302 &compresslevel))
1303 return -1;
1304
1305 if (compresslevel < 1 || compresslevel > 9) {
1306 PyErr_SetString(PyExc_ValueError,
1307 "compresslevel must be between 1 and 9");
1308 return -1;
1309 }
1310
1311 for (;;) {
1312 int error = 0;
1313 switch (*mode) {
1314 case 'r':
1315 case 'w':
1316 if (mode_char)
1317 error = 1;
1318 mode_char = *mode;
1319 break;
1320
1321 case 'b':
1322 break;
1323
1324 case 'U':
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001325#ifdef __VMS
1326 self->f_univ_newline = 0;
1327#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001328 self->f_univ_newline = 1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001329#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001330 break;
1331
1332 default:
1333 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001334 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001335 }
1336 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001337 PyErr_Format(PyExc_ValueError,
1338 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001339 return -1;
1340 }
1341 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001342 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343 break;
1344 }
1345
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001346 if (mode_char == 0) {
1347 mode_char = 'r';
1348 }
1349
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001350 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001351
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001352 self->file = NULL; /* XXX io.open(name, mode, buffering); */
1353 PyErr_SetString(PyExc_RuntimeError, "can't open bz2 files yet");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001354 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001355 return -1;
1356
1357 /* From now on, we have stuff to dealloc, so jump to error label
1358 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001359
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001360#ifdef WITH_THREAD
1361 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001362 if (!self->lock) {
1363 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001365 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001366#endif
1367
1368 if (mode_char == 'r')
1369 self->fp = BZ2_bzReadOpen(&bzerror,
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001370 fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001371 0, 0, NULL, 0);
1372 else
1373 self->fp = BZ2_bzWriteOpen(&bzerror,
Guido van Rossumda5b8f22007-06-12 23:30:11 +00001374 fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375 compresslevel, 0, 0);
1376
1377 if (bzerror != BZ_OK) {
1378 Util_CatchBZ2Error(bzerror);
1379 goto error;
1380 }
1381
1382 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1383
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001384 return 0;
1385
1386error:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001387 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001388#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001390 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001391 self->lock = NULL;
1392 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001393#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001394 return -1;
1395}
1396
1397static void
1398BZ2File_dealloc(BZ2FileObject *self)
1399{
1400 int bzerror;
1401#ifdef WITH_THREAD
1402 if (self->lock)
1403 PyThread_free_lock(self->lock);
1404#endif
1405 switch (self->mode) {
1406 case MODE_READ:
1407 case MODE_READ_EOF:
1408 BZ2_bzReadClose(&bzerror, self->fp);
1409 break;
1410 case MODE_WRITE:
1411 BZ2_bzWriteClose(&bzerror, self->fp,
1412 0, NULL, NULL);
1413 break;
1414 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001415 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001416 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001417 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001418}
1419
1420/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1421static PyObject *
1422BZ2File_getiter(BZ2FileObject *self)
1423{
1424 if (self->mode == MODE_CLOSED) {
1425 PyErr_SetString(PyExc_ValueError,
1426 "I/O operation on closed file");
1427 return NULL;
1428 }
1429 Py_INCREF((PyObject*)self);
1430 return (PyObject *)self;
1431}
1432
1433/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1434#define READAHEAD_BUFSIZE 8192
1435static PyObject *
1436BZ2File_iternext(BZ2FileObject *self)
1437{
Guido van Rossum522a6c62007-05-22 23:13:45 +00001438 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439 ACQUIRE_LOCK(self);
1440 if (self->mode == MODE_CLOSED) {
1441 PyErr_SetString(PyExc_ValueError,
1442 "I/O operation on closed file");
1443 return NULL;
1444 }
1445 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1446 RELEASE_LOCK(self);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001447 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001448 Py_XDECREF(ret);
1449 return NULL;
1450 }
1451 return (PyObject *)ret;
1452}
1453
1454/* ===================================================================== */
1455/* BZ2File_Type definition. */
1456
1457PyDoc_VAR(BZ2File__doc__) =
1458PyDoc_STR(
1459"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1460\n\
1461Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1462writing. When opened for writing, the file will be created if it doesn't\n\
1463exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1464unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1465is given, must be a number between 1 and 9.\n\
1466")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467PyDoc_STR(
1468"\n\
1469Add a 'U' to mode to open the file for input with universal newline\n\
1470support. Any line ending in the input file will be seen as a '\\n' in\n\
1471Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1472for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1473'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1474newlines are available only when reading.\n\
1475")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001476;
1477
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001478static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001479 PyObject_HEAD_INIT(NULL)
1480 0, /*ob_size*/
1481 "bz2.BZ2File", /*tp_name*/
1482 sizeof(BZ2FileObject), /*tp_basicsize*/
1483 0, /*tp_itemsize*/
1484 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1485 0, /*tp_print*/
1486 0, /*tp_getattr*/
1487 0, /*tp_setattr*/
1488 0, /*tp_compare*/
1489 0, /*tp_repr*/
1490 0, /*tp_as_number*/
1491 0, /*tp_as_sequence*/
1492 0, /*tp_as_mapping*/
1493 0, /*tp_hash*/
1494 0, /*tp_call*/
1495 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001496 PyObject_GenericGetAttr,/*tp_getattro*/
1497 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001498 0, /*tp_as_buffer*/
1499 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1500 BZ2File__doc__, /*tp_doc*/
1501 0, /*tp_traverse*/
1502 0, /*tp_clear*/
1503 0, /*tp_richcompare*/
1504 0, /*tp_weaklistoffset*/
1505 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1506 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1507 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001508 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001509 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001510 0, /*tp_base*/
1511 0, /*tp_dict*/
1512 0, /*tp_descr_get*/
1513 0, /*tp_descr_set*/
1514 0, /*tp_dictoffset*/
1515 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001516 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001517 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001518 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001519 0, /*tp_is_gc*/
1520};
1521
1522
1523/* ===================================================================== */
1524/* Methods of BZ2Comp. */
1525
1526PyDoc_STRVAR(BZ2Comp_compress__doc__,
1527"compress(data) -> string\n\
1528\n\
1529Provide more data to the compressor object. It will return chunks of\n\
1530compressed data whenever possible. When you've finished providing data\n\
1531to compress, call the flush() method to finish the compression process,\n\
1532and return what is left in the internal buffers.\n\
1533");
1534
1535static PyObject *
1536BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1537{
1538 char *data;
1539 int datasize;
1540 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001541 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001542 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001543 bz_stream *bzs = &self->bzs;
1544 int bzerror;
1545
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001546 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001547 return NULL;
1548
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001549 if (datasize == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +00001550 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001551
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001552 ACQUIRE_LOCK(self);
1553 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001554 PyErr_SetString(PyExc_ValueError,
1555 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001556 goto error;
1557 }
1558
Guido van Rossum522a6c62007-05-22 23:13:45 +00001559 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001560 if (!ret)
1561 goto error;
1562
1563 bzs->next_in = data;
1564 bzs->avail_in = datasize;
1565 bzs->next_out = BUF(ret);
1566 bzs->avail_out = bufsize;
1567
1568 totalout = BZS_TOTAL_OUT(bzs);
1569
1570 for (;;) {
1571 Py_BEGIN_ALLOW_THREADS
1572 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1573 Py_END_ALLOW_THREADS
1574 if (bzerror != BZ_RUN_OK) {
1575 Util_CatchBZ2Error(bzerror);
1576 goto error;
1577 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001578 if (bzs->avail_in == 0)
1579 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001580 if (bzs->avail_out == 0) {
1581 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001582 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001583 BZ2_bzCompressEnd(bzs);
1584 goto error;
1585 }
1586 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1587 - totalout);
1588 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001589 }
1590 }
1591
Guido van Rossum522a6c62007-05-22 23:13:45 +00001592 if (PyBytes_Resize(ret,
1593 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1594 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001595
1596 RELEASE_LOCK(self);
1597 return ret;
1598
1599error:
1600 RELEASE_LOCK(self);
1601 Py_XDECREF(ret);
1602 return NULL;
1603}
1604
1605PyDoc_STRVAR(BZ2Comp_flush__doc__,
1606"flush() -> string\n\
1607\n\
1608Finish the compression process and return what is left in internal buffers.\n\
1609You must not use the compressor object after calling this method.\n\
1610");
1611
1612static PyObject *
1613BZ2Comp_flush(BZ2CompObject *self)
1614{
1615 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001616 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001617 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001618 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001619 int bzerror;
1620
1621 ACQUIRE_LOCK(self);
1622 if (!self->running) {
1623 PyErr_SetString(PyExc_ValueError, "object was already "
1624 "flushed");
1625 goto error;
1626 }
1627 self->running = 0;
1628
Guido van Rossum522a6c62007-05-22 23:13:45 +00001629 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001630 if (!ret)
1631 goto error;
1632
1633 bzs->next_out = BUF(ret);
1634 bzs->avail_out = bufsize;
1635
1636 totalout = BZS_TOTAL_OUT(bzs);
1637
1638 for (;;) {
1639 Py_BEGIN_ALLOW_THREADS
1640 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1641 Py_END_ALLOW_THREADS
1642 if (bzerror == BZ_STREAM_END) {
1643 break;
1644 } else if (bzerror != BZ_FINISH_OK) {
1645 Util_CatchBZ2Error(bzerror);
1646 goto error;
1647 }
1648 if (bzs->avail_out == 0) {
1649 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001650 if (PyBytes_Resize(ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001651 goto error;
1652 bzs->next_out = BUF(ret);
1653 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1654 - totalout);
1655 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1656 }
1657 }
1658
Guido van Rossum522a6c62007-05-22 23:13:45 +00001659 if (bzs->avail_out != 0) {
1660 if (PyBytes_Resize(ret,
1661 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1662 goto error;
1663 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001664
1665 RELEASE_LOCK(self);
1666 return ret;
1667
1668error:
1669 RELEASE_LOCK(self);
1670 Py_XDECREF(ret);
1671 return NULL;
1672}
1673
1674static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001675 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1676 BZ2Comp_compress__doc__},
1677 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1678 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001679 {NULL, NULL} /* sentinel */
1680};
1681
1682
1683/* ===================================================================== */
1684/* Slot definitions for BZ2Comp_Type. */
1685
1686static int
1687BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1688{
1689 int compresslevel = 9;
1690 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001691 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001692
1693 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1694 kwlist, &compresslevel))
1695 return -1;
1696
1697 if (compresslevel < 1 || compresslevel > 9) {
1698 PyErr_SetString(PyExc_ValueError,
1699 "compresslevel must be between 1 and 9");
1700 goto error;
1701 }
1702
1703#ifdef WITH_THREAD
1704 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001705 if (!self->lock) {
1706 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001707 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001708 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001709#endif
1710
1711 memset(&self->bzs, 0, sizeof(bz_stream));
1712 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1713 if (bzerror != BZ_OK) {
1714 Util_CatchBZ2Error(bzerror);
1715 goto error;
1716 }
1717
1718 self->running = 1;
1719
1720 return 0;
1721error:
1722#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001723 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001724 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001725 self->lock = NULL;
1726 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001727#endif
1728 return -1;
1729}
1730
1731static void
1732BZ2Comp_dealloc(BZ2CompObject *self)
1733{
1734#ifdef WITH_THREAD
1735 if (self->lock)
1736 PyThread_free_lock(self->lock);
1737#endif
1738 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001739 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001740}
1741
1742
1743/* ===================================================================== */
1744/* BZ2Comp_Type definition. */
1745
1746PyDoc_STRVAR(BZ2Comp__doc__,
1747"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1748\n\
1749Create a new compressor object. This object may be used to compress\n\
1750data sequentially. If you want to compress data in one shot, use the\n\
1751compress() function instead. The compresslevel parameter, if given,\n\
1752must be a number between 1 and 9.\n\
1753");
1754
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001755static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001756 PyObject_HEAD_INIT(NULL)
1757 0, /*ob_size*/
1758 "bz2.BZ2Compressor", /*tp_name*/
1759 sizeof(BZ2CompObject), /*tp_basicsize*/
1760 0, /*tp_itemsize*/
1761 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1762 0, /*tp_print*/
1763 0, /*tp_getattr*/
1764 0, /*tp_setattr*/
1765 0, /*tp_compare*/
1766 0, /*tp_repr*/
1767 0, /*tp_as_number*/
1768 0, /*tp_as_sequence*/
1769 0, /*tp_as_mapping*/
1770 0, /*tp_hash*/
1771 0, /*tp_call*/
1772 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001773 PyObject_GenericGetAttr,/*tp_getattro*/
1774 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001775 0, /*tp_as_buffer*/
1776 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1777 BZ2Comp__doc__, /*tp_doc*/
1778 0, /*tp_traverse*/
1779 0, /*tp_clear*/
1780 0, /*tp_richcompare*/
1781 0, /*tp_weaklistoffset*/
1782 0, /*tp_iter*/
1783 0, /*tp_iternext*/
1784 BZ2Comp_methods, /*tp_methods*/
1785 0, /*tp_members*/
1786 0, /*tp_getset*/
1787 0, /*tp_base*/
1788 0, /*tp_dict*/
1789 0, /*tp_descr_get*/
1790 0, /*tp_descr_set*/
1791 0, /*tp_dictoffset*/
1792 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001793 PyType_GenericAlloc, /*tp_alloc*/
1794 PyType_GenericNew, /*tp_new*/
1795 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001796 0, /*tp_is_gc*/
1797};
1798
1799
1800/* ===================================================================== */
1801/* Members of BZ2Decomp. */
1802
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001803#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001804#define OFF(x) offsetof(BZ2DecompObject, x)
1805
1806static PyMemberDef BZ2Decomp_members[] = {
1807 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1808 {NULL} /* Sentinel */
1809};
1810
1811
1812/* ===================================================================== */
1813/* Methods of BZ2Decomp. */
1814
1815PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1816"decompress(data) -> string\n\
1817\n\
1818Provide more data to the decompressor object. It will return chunks\n\
1819of decompressed data whenever possible. If you try to decompress data\n\
1820after the end of stream is found, EOFError will be raised. If any data\n\
1821was found after the end of stream, it'll be ignored and saved in\n\
1822unused_data attribute.\n\
1823");
1824
1825static PyObject *
1826BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1827{
1828 char *data;
1829 int datasize;
1830 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001831 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001832 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001833 bz_stream *bzs = &self->bzs;
1834 int bzerror;
1835
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001836 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001837 return NULL;
1838
1839 ACQUIRE_LOCK(self);
1840 if (!self->running) {
1841 PyErr_SetString(PyExc_EOFError, "end of stream was "
1842 "already found");
1843 goto error;
1844 }
1845
Guido van Rossum522a6c62007-05-22 23:13:45 +00001846 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001847 if (!ret)
1848 goto error;
1849
1850 bzs->next_in = data;
1851 bzs->avail_in = datasize;
1852 bzs->next_out = BUF(ret);
1853 bzs->avail_out = bufsize;
1854
1855 totalout = BZS_TOTAL_OUT(bzs);
1856
1857 for (;;) {
1858 Py_BEGIN_ALLOW_THREADS
1859 bzerror = BZ2_bzDecompress(bzs);
1860 Py_END_ALLOW_THREADS
1861 if (bzerror == BZ_STREAM_END) {
1862 if (bzs->avail_in != 0) {
1863 Py_DECREF(self->unused_data);
1864 self->unused_data =
Guido van Rossum522a6c62007-05-22 23:13:45 +00001865 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001866 bzs->avail_in);
1867 }
1868 self->running = 0;
1869 break;
1870 }
1871 if (bzerror != BZ_OK) {
1872 Util_CatchBZ2Error(bzerror);
1873 goto error;
1874 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001875 if (bzs->avail_in == 0)
1876 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001877 if (bzs->avail_out == 0) {
1878 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00001879 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001880 BZ2_bzDecompressEnd(bzs);
1881 goto error;
1882 }
1883 bzs->next_out = BUF(ret);
1884 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1885 - totalout);
1886 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001887 }
1888 }
1889
Guido van Rossum522a6c62007-05-22 23:13:45 +00001890 if (bzs->avail_out != 0) {
1891 if (PyBytes_Resize(ret,
1892 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1893 goto error;
1894 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001895
1896 RELEASE_LOCK(self);
1897 return ret;
1898
1899error:
1900 RELEASE_LOCK(self);
1901 Py_XDECREF(ret);
1902 return NULL;
1903}
1904
1905static PyMethodDef BZ2Decomp_methods[] = {
1906 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1907 {NULL, NULL} /* sentinel */
1908};
1909
1910
1911/* ===================================================================== */
1912/* Slot definitions for BZ2Decomp_Type. */
1913
1914static int
1915BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1916{
1917 int bzerror;
1918
1919 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1920 return -1;
1921
1922#ifdef WITH_THREAD
1923 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001924 if (!self->lock) {
1925 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001926 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001927 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001928#endif
1929
Guido van Rossum522a6c62007-05-22 23:13:45 +00001930 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001931 if (!self->unused_data)
1932 goto error;
1933
1934 memset(&self->bzs, 0, sizeof(bz_stream));
1935 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1936 if (bzerror != BZ_OK) {
1937 Util_CatchBZ2Error(bzerror);
1938 goto error;
1939 }
1940
1941 self->running = 1;
1942
1943 return 0;
1944
1945error:
1946#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001947 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001948 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001949 self->lock = NULL;
1950 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001952 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001953 return -1;
1954}
1955
1956static void
1957BZ2Decomp_dealloc(BZ2DecompObject *self)
1958{
1959#ifdef WITH_THREAD
1960 if (self->lock)
1961 PyThread_free_lock(self->lock);
1962#endif
1963 Py_XDECREF(self->unused_data);
1964 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001965 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966}
1967
1968
1969/* ===================================================================== */
1970/* BZ2Decomp_Type definition. */
1971
1972PyDoc_STRVAR(BZ2Decomp__doc__,
1973"BZ2Decompressor() -> decompressor object\n\
1974\n\
1975Create a new decompressor object. This object may be used to decompress\n\
1976data sequentially. If you want to decompress data in one shot, use the\n\
1977decompress() function instead.\n\
1978");
1979
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001980static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001981 PyObject_HEAD_INIT(NULL)
1982 0, /*ob_size*/
1983 "bz2.BZ2Decompressor", /*tp_name*/
1984 sizeof(BZ2DecompObject), /*tp_basicsize*/
1985 0, /*tp_itemsize*/
1986 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1987 0, /*tp_print*/
1988 0, /*tp_getattr*/
1989 0, /*tp_setattr*/
1990 0, /*tp_compare*/
1991 0, /*tp_repr*/
1992 0, /*tp_as_number*/
1993 0, /*tp_as_sequence*/
1994 0, /*tp_as_mapping*/
1995 0, /*tp_hash*/
1996 0, /*tp_call*/
1997 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001998 PyObject_GenericGetAttr,/*tp_getattro*/
1999 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002000 0, /*tp_as_buffer*/
2001 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2002 BZ2Decomp__doc__, /*tp_doc*/
2003 0, /*tp_traverse*/
2004 0, /*tp_clear*/
2005 0, /*tp_richcompare*/
2006 0, /*tp_weaklistoffset*/
2007 0, /*tp_iter*/
2008 0, /*tp_iternext*/
2009 BZ2Decomp_methods, /*tp_methods*/
2010 BZ2Decomp_members, /*tp_members*/
2011 0, /*tp_getset*/
2012 0, /*tp_base*/
2013 0, /*tp_dict*/
2014 0, /*tp_descr_get*/
2015 0, /*tp_descr_set*/
2016 0, /*tp_dictoffset*/
2017 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002018 PyType_GenericAlloc, /*tp_alloc*/
2019 PyType_GenericNew, /*tp_new*/
2020 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002021 0, /*tp_is_gc*/
2022};
2023
2024
2025/* ===================================================================== */
2026/* Module functions. */
2027
2028PyDoc_STRVAR(bz2_compress__doc__,
2029"compress(data [, compresslevel=9]) -> string\n\
2030\n\
2031Compress data in one shot. If you want to compress data sequentially,\n\
2032use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2033given, must be a number between 1 and 9.\n\
2034");
2035
2036static PyObject *
2037bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2038{
2039 int compresslevel=9;
2040 char *data;
2041 int datasize;
2042 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002043 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002044 bz_stream _bzs;
2045 bz_stream *bzs = &_bzs;
2046 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002047 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002048
2049 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2050 kwlist, &data, &datasize,
2051 &compresslevel))
2052 return NULL;
2053
2054 if (compresslevel < 1 || compresslevel > 9) {
2055 PyErr_SetString(PyExc_ValueError,
2056 "compresslevel must be between 1 and 9");
2057 return NULL;
2058 }
2059
2060 /* Conforming to bz2 manual, this is large enough to fit compressed
2061 * data in one shot. We will check it later anyway. */
2062 bufsize = datasize + (datasize/100+1) + 600;
2063
Guido van Rossum522a6c62007-05-22 23:13:45 +00002064 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065 if (!ret)
2066 return NULL;
2067
2068 memset(bzs, 0, sizeof(bz_stream));
2069
2070 bzs->next_in = data;
2071 bzs->avail_in = datasize;
2072 bzs->next_out = BUF(ret);
2073 bzs->avail_out = bufsize;
2074
2075 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2076 if (bzerror != BZ_OK) {
2077 Util_CatchBZ2Error(bzerror);
2078 Py_DECREF(ret);
2079 return NULL;
2080 }
Tim Peterse3228092002-11-09 04:21:44 +00002081
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002082 for (;;) {
2083 Py_BEGIN_ALLOW_THREADS
2084 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2085 Py_END_ALLOW_THREADS
2086 if (bzerror == BZ_STREAM_END) {
2087 break;
2088 } else if (bzerror != BZ_FINISH_OK) {
2089 BZ2_bzCompressEnd(bzs);
2090 Util_CatchBZ2Error(bzerror);
2091 Py_DECREF(ret);
2092 return NULL;
2093 }
2094 if (bzs->avail_out == 0) {
2095 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00002096 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002097 BZ2_bzCompressEnd(bzs);
2098 Py_DECREF(ret);
2099 return NULL;
2100 }
2101 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2102 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2103 }
2104 }
2105
Guido van Rossum522a6c62007-05-22 23:13:45 +00002106 if (bzs->avail_out != 0) {
2107 if (PyBytes_Resize(ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2108 Py_DECREF(ret);
2109 ret = NULL;
2110 }
2111 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002112 BZ2_bzCompressEnd(bzs);
2113
2114 return ret;
2115}
2116
2117PyDoc_STRVAR(bz2_decompress__doc__,
2118"decompress(data) -> decompressed data\n\
2119\n\
2120Decompress data in one shot. If you want to decompress data sequentially,\n\
2121use an instance of BZ2Decompressor instead.\n\
2122");
2123
2124static PyObject *
2125bz2_decompress(PyObject *self, PyObject *args)
2126{
2127 char *data;
2128 int datasize;
2129 int bufsize = SMALLCHUNK;
2130 PyObject *ret;
2131 bz_stream _bzs;
2132 bz_stream *bzs = &_bzs;
2133 int bzerror;
2134
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002135 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002136 return NULL;
2137
2138 if (datasize == 0)
Guido van Rossum522a6c62007-05-22 23:13:45 +00002139 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002140
Guido van Rossum522a6c62007-05-22 23:13:45 +00002141 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002142 if (!ret)
2143 return NULL;
2144
2145 memset(bzs, 0, sizeof(bz_stream));
2146
2147 bzs->next_in = data;
2148 bzs->avail_in = datasize;
2149 bzs->next_out = BUF(ret);
2150 bzs->avail_out = bufsize;
2151
2152 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2153 if (bzerror != BZ_OK) {
2154 Util_CatchBZ2Error(bzerror);
2155 Py_DECREF(ret);
2156 return NULL;
2157 }
Tim Peterse3228092002-11-09 04:21:44 +00002158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002159 for (;;) {
2160 Py_BEGIN_ALLOW_THREADS
2161 bzerror = BZ2_bzDecompress(bzs);
2162 Py_END_ALLOW_THREADS
2163 if (bzerror == BZ_STREAM_END) {
2164 break;
2165 } else if (bzerror != BZ_OK) {
2166 BZ2_bzDecompressEnd(bzs);
2167 Util_CatchBZ2Error(bzerror);
2168 Py_DECREF(ret);
2169 return NULL;
2170 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002171 if (bzs->avail_in == 0) {
2172 BZ2_bzDecompressEnd(bzs);
2173 PyErr_SetString(PyExc_ValueError,
2174 "couldn't find end of stream");
2175 Py_DECREF(ret);
2176 return NULL;
2177 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002178 if (bzs->avail_out == 0) {
2179 bufsize = Util_NewBufferSize(bufsize);
Guido van Rossum522a6c62007-05-22 23:13:45 +00002180 if (PyBytes_Resize(ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002181 BZ2_bzDecompressEnd(bzs);
2182 Py_DECREF(ret);
2183 return NULL;
2184 }
2185 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2186 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002187 }
2188 }
2189
Guido van Rossum522a6c62007-05-22 23:13:45 +00002190 if (bzs->avail_out != 0) {
2191 if (PyBytes_Resize(ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2192 Py_DECREF(ret);
2193 ret = NULL;
2194 }
2195 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002196 BZ2_bzDecompressEnd(bzs);
2197
2198 return ret;
2199}
2200
2201static PyMethodDef bz2_methods[] = {
2202 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2203 bz2_compress__doc__},
2204 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2205 bz2_decompress__doc__},
2206 {NULL, NULL} /* sentinel */
2207};
2208
2209/* ===================================================================== */
2210/* Initialization function. */
2211
2212PyDoc_STRVAR(bz2__doc__,
2213"The python bz2 module provides a comprehensive interface for\n\
2214the bz2 compression library. It implements a complete file\n\
2215interface, one shot (de)compression functions, and types for\n\
2216sequential (de)compression.\n\
2217");
2218
Neal Norwitz21d896c2003-07-01 20:15:21 +00002219PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002220initbz2(void)
2221{
2222 PyObject *m;
2223
2224 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002225 BZ2Comp_Type.ob_type = &PyType_Type;
2226 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002227
2228 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002229 if (m == NULL)
2230 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002231
2232 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2233
2234 Py_INCREF(&BZ2File_Type);
2235 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2236
2237 Py_INCREF(&BZ2Comp_Type);
2238 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2239
2240 Py_INCREF(&BZ2Decomp_Type);
2241 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2242}