blob: 0a367a77696196a4a32bb6ef3a39e0d5c21e8eb7 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouc83ea132010-05-09 14:46:46 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitroudd629662009-10-27 17:41:58 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000103 PyObject_HEAD
104 PyObject *file;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000110 int f_softspace; /* Flag used by 'print' command */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000111
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000115
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000121 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000122#endif
123} BZ2FileObject;
124
125typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000129#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000130 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000131#endif
132} BZ2CompObject;
133
134typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000139#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000140 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
Antoine Pitrou39703012010-08-01 20:13:11 +0000147/* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150static int
151check_iterbuffered(BZ2FileObject *f)
152{
153 if (f->f_buf != NULL &&
154 (f->f_bufend - f->f_bufptr) > 0 &&
155 f->f_buf[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError,
157 "Mixing iteration and read methods would lose data");
158 return -1;
159 }
160 return 0;
161}
162
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000163static int
164Util_CatchBZ2Error(int bzerror)
165{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000166 int ret = 0;
167 switch(bzerror) {
168 case BZ_OK:
169 case BZ_STREAM_END:
170 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000171
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000172#ifdef BZ_CONFIG_ERROR
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000173 case BZ_CONFIG_ERROR:
174 PyErr_SetString(PyExc_SystemError,
175 "the bz2 library was not compiled "
176 "correctly");
177 ret = 1;
178 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000179#endif
Tim Peterse3228092002-11-09 04:21:44 +0000180
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 case BZ_PARAM_ERROR:
182 PyErr_SetString(PyExc_ValueError,
183 "the bz2 library has received wrong "
184 "parameters");
185 ret = 1;
186 break;
Tim Peterse3228092002-11-09 04:21:44 +0000187
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000188 case BZ_MEM_ERROR:
189 PyErr_NoMemory();
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000193 case BZ_DATA_ERROR:
194 case BZ_DATA_ERROR_MAGIC:
195 PyErr_SetString(PyExc_IOError, "invalid data stream");
196 ret = 1;
197 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 case BZ_IO_ERROR:
200 PyErr_SetString(PyExc_IOError, "unknown IO error");
201 ret = 1;
202 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000203
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000204 case BZ_UNEXPECTED_EOF:
205 PyErr_SetString(PyExc_EOFError,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
208 ret = 1;
209 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000210
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000211 case BZ_SEQUENCE_ERROR:
212 PyErr_SetString(PyExc_RuntimeError,
213 "wrong sequence of bz2 library "
214 "commands used");
215 ret = 1;
216 break;
217 }
218 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000219}
220
221#if BUFSIZ < 8192
222#define SMALLCHUNK 8192
223#else
224#define SMALLCHUNK BUFSIZ
225#endif
226
227#if SIZEOF_INT < 4
228#define BIGCHUNK (512 * 32)
229#else
230#define BIGCHUNK (512 * 1024)
231#endif
232
233/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
234static size_t
235Util_NewBufferSize(size_t currentsize)
236{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000237 if (currentsize > SMALLCHUNK) {
238 /* Keep doubling until we reach BIGCHUNK;
239 then keep adding BIGCHUNK. */
240 if (currentsize <= BIGCHUNK)
241 return currentsize + currentsize;
242 else
243 return currentsize + BIGCHUNK;
244 }
245 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000246}
247
248/* This is a hacked version of Python's fileobject.c:get_line(). */
249static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000250Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000252 char c;
253 char *buf, *end;
254 size_t total_v_size; /* total # of slots in buffer */
255 size_t used_v_size; /* # used slots in buffer */
256 size_t increment; /* amount to increment the buffer */
257 PyObject *v;
258 int bzerror;
259 int bytes_read;
260 int newlinetypes = f->f_newlinetypes;
261 int skipnextlf = f->f_skipnextlf;
262 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000264 total_v_size = n > 0 ? n : 100;
265 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
266 if (v == NULL)
267 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000268
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000269 buf = BUF(v);
270 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000271
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 for (;;) {
273 Py_BEGIN_ALLOW_THREADS
274 while (buf != end) {
275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276 f->pos++;
277 if (bytes_read == 0) break;
278 if (univ_newline) {
279 if (skipnextlf) {
280 skipnextlf = 0;
281 if (c == '\n') {
282 /* Seeing a \n here with skipnextlf true means we
283 * saw a \r before.
284 */
285 newlinetypes |= NEWLINE_CRLF;
286 if (bzerror != BZ_OK) break;
287 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
288 f->pos++;
289 if (bytes_read == 0) break;
290 } else {
291 newlinetypes |= NEWLINE_CR;
292 }
293 }
294 if (c == '\r') {
295 skipnextlf = 1;
296 c = '\n';
297 } else if (c == '\n')
298 newlinetypes |= NEWLINE_LF;
299 }
300 *buf++ = c;
301 if (bzerror != BZ_OK || c == '\n') break;
302 }
303 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
304 newlinetypes |= NEWLINE_CR;
305 Py_END_ALLOW_THREADS
306 f->f_newlinetypes = newlinetypes;
307 f->f_skipnextlf = skipnextlf;
308 if (bzerror == BZ_STREAM_END) {
309 f->size = f->pos;
310 f->mode = MODE_READ_EOF;
311 break;
312 } else if (bzerror != BZ_OK) {
313 Util_CatchBZ2Error(bzerror);
314 Py_DECREF(v);
315 return NULL;
316 }
317 if (c == '\n')
318 break;
319 /* Must be because buf == end */
320 if (n > 0)
321 break;
322 used_v_size = total_v_size;
323 increment = total_v_size >> 2; /* mild exponential growth */
324 total_v_size += increment;
325 if (total_v_size > INT_MAX) {
326 PyErr_SetString(PyExc_OverflowError,
327 "line is longer than a Python string can hold");
328 Py_DECREF(v);
329 return NULL;
330 }
331 if (_PyString_Resize(&v, total_v_size) < 0)
332 return NULL;
333 buf = BUF(v) + used_v_size;
334 end = BUF(v) + total_v_size;
335 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000336
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000337 used_v_size = buf - BUF(v);
338 if (used_v_size != total_v_size)
339 _PyString_Resize(&v, used_v_size);
340 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000341}
342
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000343/* This is a hacked version of Python's
344 * fileobject.c:Py_UniversalNewlineFread(). */
345size_t
346Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000347 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000348{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000349 char *dst = buf;
350 int newlinetypes, skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000351
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000352 assert(buf != NULL);
353 assert(stream != NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000354
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000355 if (!f->f_univ_newline)
356 return BZ2_bzRead(bzerror, stream, buf, n);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000357
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000358 newlinetypes = f->f_newlinetypes;
359 skipnextlf = f->f_skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000360
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000361 /* Invariant: n is the number of bytes remaining to be filled
362 * in the buffer.
363 */
364 while (n) {
365 size_t nread;
366 int shortread;
367 char *src = dst;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000368
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000369 nread = BZ2_bzRead(bzerror, stream, dst, n);
370 assert(nread <= n);
371 n -= nread; /* assuming 1 byte out for each in; will adjust */
372 shortread = n != 0; /* true iff EOF or error */
373 while (nread--) {
374 char c = *src++;
375 if (c == '\r') {
376 /* Save as LF and set flag to skip next LF. */
377 *dst++ = '\n';
378 skipnextlf = 1;
379 }
380 else if (skipnextlf && c == '\n') {
381 /* Skip LF, and remember we saw CR LF. */
382 skipnextlf = 0;
383 newlinetypes |= NEWLINE_CRLF;
384 ++n;
385 }
386 else {
387 /* Normal char to be stored in buffer. Also
388 * update the newlinetypes flag if either this
389 * is an LF or the previous char was a CR.
390 */
391 if (c == '\n')
392 newlinetypes |= NEWLINE_LF;
393 else if (skipnextlf)
394 newlinetypes |= NEWLINE_CR;
395 *dst++ = c;
396 skipnextlf = 0;
397 }
398 }
399 if (shortread) {
400 /* If this is EOF, update type flags. */
401 if (skipnextlf && *bzerror == BZ_STREAM_END)
402 newlinetypes |= NEWLINE_CR;
403 break;
404 }
405 }
406 f->f_newlinetypes = newlinetypes;
407 f->f_skipnextlf = skipnextlf;
408 return dst - buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000410
411/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
412static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000415 if (f->f_buf != NULL) {
416 PyMem_Free(f->f_buf);
417 f->f_buf = NULL;
418 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000419}
420
421/* This is a hacked version of Python's fileobject.c:readahead(). */
422static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000425 int chunksize;
426 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000428 if (f->f_buf != NULL) {
429 if((f->f_bufend - f->f_bufptr) >= 1)
430 return 0;
431 else
432 Util_DropReadAhead(f);
433 }
434 if (f->mode == MODE_READ_EOF) {
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf;
437 return 0;
438 }
439 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
440 PyErr_NoMemory();
441 return -1;
442 }
443 Py_BEGIN_ALLOW_THREADS
444 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
445 bufsize, f);
446 Py_END_ALLOW_THREADS
447 f->pos += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 f->size = f->pos;
450 f->mode = MODE_READ_EOF;
451 } else if (bzerror != BZ_OK) {
452 Util_CatchBZ2Error(bzerror);
453 Util_DropReadAhead(f);
454 return -1;
455 }
456 f->f_bufptr = f->f_buf;
457 f->f_bufend = f->f_buf + chunksize;
458 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000459}
460
461/* This is a hacked version of Python's
462 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000463static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000464Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000465{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000466 PyStringObject* s;
467 char *bufptr;
468 char *buf;
469 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000471 if (f->f_buf == NULL)
472 if (Util_ReadAhead(f, bufsize) < 0)
473 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000474
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000475 len = f->f_bufend - f->f_bufptr;
476 if (len == 0)
477 return (PyStringObject *)
478 PyString_FromStringAndSize(NULL, skip);
479 bufptr = memchr(f->f_bufptr, '\n', len);
480 if (bufptr != NULL) {
481 bufptr++; /* Count the '\n' */
482 len = bufptr - f->f_bufptr;
483 s = (PyStringObject *)
484 PyString_FromStringAndSize(NULL, skip+len);
485 if (s == NULL)
486 return NULL;
487 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
488 f->f_bufptr = bufptr;
489 if (bufptr == f->f_bufend)
490 Util_DropReadAhead(f);
491 } else {
492 bufptr = f->f_bufptr;
493 buf = f->f_buf;
494 f->f_buf = NULL; /* Force new readahead buffer */
495 s = Util_ReadAheadGetLineSkip(f, skip+len,
496 bufsize + (bufsize>>2));
497 if (s == NULL) {
498 PyMem_Free(buf);
499 return NULL;
500 }
501 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
502 PyMem_Free(buf);
503 }
504 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000505}
506
507/* ===================================================================== */
508/* Methods of BZ2File. */
509
510PyDoc_STRVAR(BZ2File_read__doc__,
511"read([size]) -> string\n\
512\n\
513Read at most size uncompressed bytes, returned as a string. If the size\n\
514argument is negative or omitted, read until EOF is reached.\n\
515");
516
517/* This is a hacked version of Python's fileobject.c:file_read(). */
518static PyObject *
519BZ2File_read(BZ2FileObject *self, PyObject *args)
520{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000521 long bytesrequested = -1;
522 size_t bytesread, buffersize, chunksize;
523 int bzerror;
524 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000525
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000526 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
527 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000528
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000529 ACQUIRE_LOCK(self);
530 switch (self->mode) {
531 case MODE_READ:
532 break;
533 case MODE_READ_EOF:
534 ret = PyString_FromString("");
535 goto cleanup;
536 case MODE_CLOSED:
537 PyErr_SetString(PyExc_ValueError,
538 "I/O operation on closed file");
539 goto cleanup;
540 default:
541 PyErr_SetString(PyExc_IOError,
542 "file is not ready for reading");
543 goto cleanup;
544 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000545
Antoine Pitrou39703012010-08-01 20:13:11 +0000546 /* refuse to mix with f.next() */
547 if (check_iterbuffered(self))
548 goto cleanup;
549
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000550 if (bytesrequested < 0)
551 buffersize = Util_NewBufferSize((size_t)0);
552 else
553 buffersize = bytesrequested;
554 if (buffersize > INT_MAX) {
555 PyErr_SetString(PyExc_OverflowError,
556 "requested number of bytes is "
557 "more than a Python string can hold");
558 goto cleanup;
559 }
560 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
561 if (ret == NULL)
562 goto cleanup;
563 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000564
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000565 for (;;) {
566 Py_BEGIN_ALLOW_THREADS
567 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
568 BUF(ret)+bytesread,
569 buffersize-bytesread,
570 self);
571 self->pos += chunksize;
572 Py_END_ALLOW_THREADS
573 bytesread += chunksize;
574 if (bzerror == BZ_STREAM_END) {
575 self->size = self->pos;
576 self->mode = MODE_READ_EOF;
577 break;
578 } else if (bzerror != BZ_OK) {
579 Util_CatchBZ2Error(bzerror);
580 Py_DECREF(ret);
581 ret = NULL;
582 goto cleanup;
583 }
584 if (bytesrequested < 0) {
585 buffersize = Util_NewBufferSize(buffersize);
586 if (_PyString_Resize(&ret, buffersize) < 0)
587 goto cleanup;
588 } else {
589 break;
590 }
591 }
592 if (bytesread != buffersize)
593 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000594
595cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000596 RELEASE_LOCK(self);
597 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000598}
599
600PyDoc_STRVAR(BZ2File_readline__doc__,
601"readline([size]) -> string\n\
602\n\
603Return the next line from the file, as a string, retaining newline.\n\
604A non-negative size argument will limit the maximum number of bytes to\n\
605return (an incomplete line may be returned then). Return an empty\n\
606string at EOF.\n\
607");
608
609static PyObject *
610BZ2File_readline(BZ2FileObject *self, PyObject *args)
611{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000612 PyObject *ret = NULL;
613 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000614
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000615 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
616 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000617
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000618 ACQUIRE_LOCK(self);
619 switch (self->mode) {
620 case MODE_READ:
621 break;
622 case MODE_READ_EOF:
623 ret = PyString_FromString("");
624 goto cleanup;
625 case MODE_CLOSED:
626 PyErr_SetString(PyExc_ValueError,
627 "I/O operation on closed file");
628 goto cleanup;
629 default:
630 PyErr_SetString(PyExc_IOError,
631 "file is not ready for reading");
632 goto cleanup;
633 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000634
Antoine Pitrou39703012010-08-01 20:13:11 +0000635 /* refuse to mix with f.next() */
636 if (check_iterbuffered(self))
637 goto cleanup;
638
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000639 if (sizehint == 0)
640 ret = PyString_FromString("");
641 else
642 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000643
644cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000645 RELEASE_LOCK(self);
646 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000647}
648
649PyDoc_STRVAR(BZ2File_readlines__doc__,
650"readlines([size]) -> list\n\
651\n\
652Call readline() repeatedly and return a list of lines read.\n\
653The optional size argument, if given, is an approximate bound on the\n\
654total number of bytes in the lines returned.\n\
655");
656
657/* This is a hacked version of Python's fileobject.c:file_readlines(). */
658static PyObject *
659BZ2File_readlines(BZ2FileObject *self, PyObject *args)
660{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000661 long sizehint = 0;
662 PyObject *list = NULL;
663 PyObject *line;
664 char small_buffer[SMALLCHUNK];
665 char *buffer = small_buffer;
666 size_t buffersize = SMALLCHUNK;
667 PyObject *big_buffer = NULL;
668 size_t nfilled = 0;
669 size_t nread;
670 size_t totalread = 0;
671 char *p, *q, *end;
672 int err;
673 int shortread = 0;
674 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000675
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000676 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
677 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000678
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000679 ACQUIRE_LOCK(self);
680 switch (self->mode) {
681 case MODE_READ:
682 break;
683 case MODE_READ_EOF:
684 list = PyList_New(0);
685 goto cleanup;
686 case MODE_CLOSED:
687 PyErr_SetString(PyExc_ValueError,
688 "I/O operation on closed file");
689 goto cleanup;
690 default:
691 PyErr_SetString(PyExc_IOError,
692 "file is not ready for reading");
693 goto cleanup;
694 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695
Antoine Pitrou39703012010-08-01 20:13:11 +0000696 /* refuse to mix with f.next() */
697 if (check_iterbuffered(self))
698 goto cleanup;
699
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000700 if ((list = PyList_New(0)) == NULL)
701 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000702
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000703 for (;;) {
704 Py_BEGIN_ALLOW_THREADS
705 nread = Util_UnivNewlineRead(&bzerror, self->fp,
706 buffer+nfilled,
707 buffersize-nfilled, self);
708 self->pos += nread;
709 Py_END_ALLOW_THREADS
710 if (bzerror == BZ_STREAM_END) {
711 self->size = self->pos;
712 self->mode = MODE_READ_EOF;
713 if (nread == 0) {
714 sizehint = 0;
715 break;
716 }
717 shortread = 1;
718 } else if (bzerror != BZ_OK) {
719 Util_CatchBZ2Error(bzerror);
720 error:
721 Py_DECREF(list);
722 list = NULL;
723 goto cleanup;
724 }
725 totalread += nread;
726 p = memchr(buffer+nfilled, '\n', nread);
727 if (!shortread && p == NULL) {
728 /* Need a larger buffer to fit this line */
729 nfilled += nread;
730 buffersize *= 2;
731 if (buffersize > INT_MAX) {
732 PyErr_SetString(PyExc_OverflowError,
733 "line is longer than a Python string can hold");
734 goto error;
735 }
736 if (big_buffer == NULL) {
737 /* Create the big buffer */
738 big_buffer = PyString_FromStringAndSize(
739 NULL, buffersize);
740 if (big_buffer == NULL)
741 goto error;
742 buffer = PyString_AS_STRING(big_buffer);
743 memcpy(buffer, small_buffer, nfilled);
744 }
745 else {
746 /* Grow the big buffer */
747 _PyString_Resize(&big_buffer, buffersize);
748 buffer = PyString_AS_STRING(big_buffer);
749 }
750 continue;
751 }
752 end = buffer+nfilled+nread;
753 q = buffer;
754 while (p != NULL) {
755 /* Process complete lines */
756 p++;
757 line = PyString_FromStringAndSize(q, p-q);
758 if (line == NULL)
759 goto error;
760 err = PyList_Append(list, line);
761 Py_DECREF(line);
762 if (err != 0)
763 goto error;
764 q = p;
765 p = memchr(q, '\n', end-q);
766 }
767 /* Move the remaining incomplete line to the start */
768 nfilled = end-q;
769 memmove(buffer, q, nfilled);
770 if (sizehint > 0)
771 if (totalread >= (size_t)sizehint)
772 break;
773 if (shortread) {
774 sizehint = 0;
775 break;
776 }
777 }
778 if (nfilled != 0) {
779 /* Partial last line */
780 line = PyString_FromStringAndSize(buffer, nfilled);
781 if (line == NULL)
782 goto error;
783 if (sizehint > 0) {
784 /* Need to complete the last line */
785 PyObject *rest = Util_GetLine(self, 0);
786 if (rest == NULL) {
787 Py_DECREF(line);
788 goto error;
789 }
790 PyString_Concat(&line, rest);
791 Py_DECREF(rest);
792 if (line == NULL)
793 goto error;
794 }
795 err = PyList_Append(list, line);
796 Py_DECREF(line);
797 if (err != 0)
798 goto error;
799 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000800
801 cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000802 RELEASE_LOCK(self);
803 if (big_buffer) {
804 Py_DECREF(big_buffer);
805 }
806 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807}
808
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000809PyDoc_STRVAR(BZ2File_xreadlines__doc__,
810"xreadlines() -> self\n\
811\n\
812For backward compatibility. BZ2File objects now include the performance\n\
813optimizations previously implemented in the xreadlines module.\n\
814");
815
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816PyDoc_STRVAR(BZ2File_write__doc__,
817"write(data) -> None\n\
818\n\
819Write the 'data' string to file. Note that due to buffering, close() may\n\
820be needed before the file on disk reflects the data written.\n\
821");
822
823/* This is a hacked version of Python's fileobject.c:file_write(). */
824static PyObject *
825BZ2File_write(BZ2FileObject *self, PyObject *args)
826{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000827 PyObject *ret = NULL;
828 Py_buffer pbuf;
829 char *buf;
830 int len;
831 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000832
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000833 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
834 return NULL;
835 buf = pbuf.buf;
836 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000837
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000838 ACQUIRE_LOCK(self);
839 switch (self->mode) {
840 case MODE_WRITE:
841 break;
Tim Peterse3228092002-11-09 04:21:44 +0000842
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000843 case MODE_CLOSED:
844 PyErr_SetString(PyExc_ValueError,
845 "I/O operation on closed file");
846 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000847
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000848 default:
849 PyErr_SetString(PyExc_IOError,
850 "file is not ready for writing");
851 goto cleanup;
852 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000853
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000854 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000855
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000856 Py_BEGIN_ALLOW_THREADS
857 BZ2_bzWrite (&bzerror, self->fp, buf, len);
858 self->pos += len;
859 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000860
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000861 if (bzerror != BZ_OK) {
862 Util_CatchBZ2Error(bzerror);
863 goto cleanup;
864 }
Tim Peterse3228092002-11-09 04:21:44 +0000865
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000866 Py_INCREF(Py_None);
867 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000868
869cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000870 PyBuffer_Release(&pbuf);
871 RELEASE_LOCK(self);
872 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000873}
874
875PyDoc_STRVAR(BZ2File_writelines__doc__,
876"writelines(sequence_of_strings) -> None\n\
877\n\
878Write the sequence of strings to the file. Note that newlines are not\n\
879added. The sequence can be any iterable object producing strings. This is\n\
880equivalent to calling write() for each string.\n\
881");
882
883/* This is a hacked version of Python's fileobject.c:file_writelines(). */
884static PyObject *
885BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
886{
887#define CHUNKSIZE 1000
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000888 PyObject *list = NULL;
889 PyObject *iter = NULL;
890 PyObject *ret = NULL;
891 PyObject *line;
892 int i, j, index, len, islist;
893 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 ACQUIRE_LOCK(self);
896 switch (self->mode) {
897 case MODE_WRITE:
898 break;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000899
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000900 case MODE_CLOSED:
901 PyErr_SetString(PyExc_ValueError,
902 "I/O operation on closed file");
903 goto error;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000904
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000905 default:
906 PyErr_SetString(PyExc_IOError,
907 "file is not ready for writing");
908 goto error;
909 }
Georg Brandl3335a7a2006-08-14 21:42:55 +0000910
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000911 islist = PyList_Check(seq);
912 if (!islist) {
913 iter = PyObject_GetIter(seq);
914 if (iter == NULL) {
915 PyErr_SetString(PyExc_TypeError,
916 "writelines() requires an iterable argument");
917 goto error;
918 }
919 list = PyList_New(CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000923
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 /* Strategy: slurp CHUNKSIZE lines into a private list,
925 checking that they are all strings, then write that list
926 without holding the interpreter lock, then come back for more. */
927 for (index = 0; ; index += CHUNKSIZE) {
928 if (islist) {
929 Py_XDECREF(list);
930 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
931 if (list == NULL)
932 goto error;
933 j = PyList_GET_SIZE(list);
934 }
935 else {
936 for (j = 0; j < CHUNKSIZE; j++) {
937 line = PyIter_Next(iter);
938 if (line == NULL) {
939 if (PyErr_Occurred())
940 goto error;
941 break;
942 }
943 PyList_SetItem(list, j, line);
944 }
945 }
946 if (j == 0)
947 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000948
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000949 /* Check that all entries are indeed strings. If not,
950 apply the same rules as for file.write() and
951 convert the rets to strings. This is slow, but
952 seems to be the only way since all conversion APIs
953 could potentially execute Python code. */
954 for (i = 0; i < j; i++) {
955 PyObject *v = PyList_GET_ITEM(list, i);
956 if (!PyString_Check(v)) {
957 const char *buffer;
958 Py_ssize_t len;
959 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
960 PyErr_SetString(PyExc_TypeError,
961 "writelines() "
962 "argument must be "
963 "a sequence of "
964 "strings");
965 goto error;
966 }
967 line = PyString_FromStringAndSize(buffer,
968 len);
969 if (line == NULL)
970 goto error;
971 Py_DECREF(v);
972 PyList_SET_ITEM(list, i, line);
973 }
974 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000975
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000976 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000977
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000978 /* Since we are releasing the global lock, the
979 following code may *not* execute Python code. */
980 Py_BEGIN_ALLOW_THREADS
981 for (i = 0; i < j; i++) {
982 line = PyList_GET_ITEM(list, i);
983 len = PyString_GET_SIZE(line);
984 BZ2_bzWrite (&bzerror, self->fp,
985 PyString_AS_STRING(line), len);
986 if (bzerror != BZ_OK) {
987 Py_BLOCK_THREADS
988 Util_CatchBZ2Error(bzerror);
989 goto error;
990 }
991 }
992 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000993
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000994 if (j < CHUNKSIZE)
995 break;
996 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000997
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000998 Py_INCREF(Py_None);
999 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000
1001 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001002 RELEASE_LOCK(self);
1003 Py_XDECREF(list);
1004 Py_XDECREF(iter);
1005 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001006#undef CHUNKSIZE
1007}
1008
1009PyDoc_STRVAR(BZ2File_seek__doc__,
1010"seek(offset [, whence]) -> None\n\
1011\n\
1012Move to new file position. Argument offset is a byte count. Optional\n\
1013argument whence defaults to 0 (offset from start of file, offset\n\
1014should be >= 0); other values are 1 (move relative to current position,\n\
1015positive or negative), and 2 (move relative to end of file, usually\n\
1016negative, although many platforms allow seeking beyond the end of a file).\n\
1017\n\
1018Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1019the operation may be extremely slow.\n\
1020");
1021
1022static PyObject *
1023BZ2File_seek(BZ2FileObject *self, PyObject *args)
1024{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001025 int where = 0;
1026 PyObject *offobj;
1027 Py_off_t offset;
1028 char small_buffer[SMALLCHUNK];
1029 char *buffer = small_buffer;
1030 size_t buffersize = SMALLCHUNK;
1031 Py_off_t bytesread = 0;
1032 size_t readsize;
1033 int chunksize;
1034 int bzerror;
1035 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001037 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1038 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001039#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001040 offset = PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001041#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001042 offset = PyLong_Check(offobj) ?
1043 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001044#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001045 if (PyErr_Occurred())
1046 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001048 ACQUIRE_LOCK(self);
1049 Util_DropReadAhead(self);
1050 switch (self->mode) {
1051 case MODE_READ:
1052 case MODE_READ_EOF:
1053 break;
Tim Peterse3228092002-11-09 04:21:44 +00001054
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001055 case MODE_CLOSED:
1056 PyErr_SetString(PyExc_ValueError,
1057 "I/O operation on closed file");
1058 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001059
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001060 default:
1061 PyErr_SetString(PyExc_IOError,
1062 "seek works only while reading");
1063 goto cleanup;
1064 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001066 if (where == 2) {
1067 if (self->size == -1) {
1068 assert(self->mode != MODE_READ_EOF);
1069 for (;;) {
1070 Py_BEGIN_ALLOW_THREADS
1071 chunksize = Util_UnivNewlineRead(
1072 &bzerror, self->fp,
1073 buffer, buffersize,
1074 self);
1075 self->pos += chunksize;
1076 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001077
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 break;
1081 } else if (bzerror != BZ_OK) {
1082 Util_CatchBZ2Error(bzerror);
1083 goto cleanup;
1084 }
1085 }
1086 self->mode = MODE_READ_EOF;
1087 self->size = self->pos;
1088 bytesread = 0;
1089 }
1090 offset = self->size + offset;
1091 } else if (where == 1) {
1092 offset = self->pos + offset;
1093 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001095 /* Before getting here, offset must be the absolute position the file
1096 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +00001097
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001098 if (offset >= self->pos) {
1099 /* we can move forward */
1100 offset -= self->pos;
1101 } else {
1102 /* we cannot move back, so rewind the stream */
1103 BZ2_bzReadClose(&bzerror, self->fp);
1104 if (self->fp) {
1105 PyFile_DecUseCount((PyFileObject *)self->file);
1106 self->fp = NULL;
1107 }
1108 if (bzerror != BZ_OK) {
1109 Util_CatchBZ2Error(bzerror);
1110 goto cleanup;
1111 }
1112 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1113 if (!ret)
1114 goto cleanup;
1115 Py_DECREF(ret);
1116 ret = NULL;
1117 self->pos = 0;
1118 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1119 0, 0, NULL, 0);
1120 if (self->fp)
1121 PyFile_IncUseCount((PyFileObject *)self->file);
1122 if (bzerror != BZ_OK) {
1123 Util_CatchBZ2Error(bzerror);
1124 goto cleanup;
1125 }
1126 self->mode = MODE_READ;
1127 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001128
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 if (offset <= 0 || self->mode == MODE_READ_EOF)
1130 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001131
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001132 /* Before getting here, offset must be set to the number of bytes
1133 * to walk forward. */
1134 for (;;) {
1135 if (offset-bytesread > buffersize)
1136 readsize = buffersize;
1137 else
1138 /* offset might be wider that readsize, but the result
1139 * of the subtraction is bound by buffersize (see the
1140 * condition above). buffersize is 8192. */
1141 readsize = (size_t)(offset-bytesread);
1142 Py_BEGIN_ALLOW_THREADS
1143 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1144 buffer, readsize, self);
1145 self->pos += chunksize;
1146 Py_END_ALLOW_THREADS
1147 bytesread += chunksize;
1148 if (bzerror == BZ_STREAM_END) {
1149 self->size = self->pos;
1150 self->mode = MODE_READ_EOF;
1151 break;
1152 } else if (bzerror != BZ_OK) {
1153 Util_CatchBZ2Error(bzerror);
1154 goto cleanup;
1155 }
1156 if (bytesread == offset)
1157 break;
1158 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001159
1160exit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 Py_INCREF(Py_None);
1162 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163
1164cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001165 RELEASE_LOCK(self);
1166 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167}
1168
1169PyDoc_STRVAR(BZ2File_tell__doc__,
1170"tell() -> int\n\
1171\n\
1172Return the current file position, an integer (may be a long integer).\n\
1173");
1174
1175static PyObject *
1176BZ2File_tell(BZ2FileObject *self, PyObject *args)
1177{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001178 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001179
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001180 if (self->mode == MODE_CLOSED) {
1181 PyErr_SetString(PyExc_ValueError,
1182 "I/O operation on closed file");
1183 goto cleanup;
1184 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185
Georg Brandla8bcecc2005-09-03 07:49:53 +00001186#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001187 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001188#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001190#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001191
1192cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194}
1195
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196PyDoc_STRVAR(BZ2File_close__doc__,
1197"close() -> None or (perhaps) an integer\n\
1198\n\
1199Close the file. Sets data attribute .closed to true. A closed file\n\
1200cannot be used for further I/O operations. close() may be called more\n\
1201than once without error.\n\
1202");
1203
1204static PyObject *
1205BZ2File_close(BZ2FileObject *self)
1206{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001207 PyObject *ret = NULL;
1208 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001210 ACQUIRE_LOCK(self);
1211 switch (self->mode) {
1212 case MODE_READ:
1213 case MODE_READ_EOF:
1214 BZ2_bzReadClose(&bzerror, self->fp);
1215 break;
1216 case MODE_WRITE:
1217 BZ2_bzWriteClose(&bzerror, self->fp,
1218 0, NULL, NULL);
1219 break;
1220 }
1221 if (self->fp) {
1222 PyFile_DecUseCount((PyFileObject *)self->file);
1223 self->fp = NULL;
1224 }
1225 self->mode = MODE_CLOSED;
1226 ret = PyObject_CallMethod(self->file, "close", NULL);
1227 if (bzerror != BZ_OK) {
1228 Util_CatchBZ2Error(bzerror);
1229 Py_XDECREF(ret);
1230 ret = NULL;
1231 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001232
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001233 RELEASE_LOCK(self);
1234 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001235}
1236
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001237PyDoc_STRVAR(BZ2File_enter_doc,
1238"__enter__() -> self.");
1239
1240static PyObject *
1241BZ2File_enter(BZ2FileObject *self)
1242{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001243 if (self->mode == MODE_CLOSED) {
1244 PyErr_SetString(PyExc_ValueError,
1245 "I/O operation on closed file");
1246 return NULL;
1247 }
1248 Py_INCREF(self);
1249 return (PyObject *) self;
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001250}
1251
1252PyDoc_STRVAR(BZ2File_exit_doc,
1253"__exit__(*excinfo) -> None. Closes the file.");
1254
1255static PyObject *
1256BZ2File_exit(BZ2FileObject *self, PyObject *args)
1257{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001258 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1259 if (!ret)
1260 /* If error occurred, pass through */
1261 return NULL;
1262 Py_DECREF(ret);
1263 Py_RETURN_NONE;
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001264}
1265
1266
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001267static PyObject *BZ2File_getiter(BZ2FileObject *self);
1268
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001269static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001270 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1271 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1272 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1273 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1274 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1275 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1276 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1277 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1278 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1279 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1280 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1281 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001282};
1283
1284
1285/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001286/* Getters and setters of BZ2File. */
1287
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001288/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1289static PyObject *
1290BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1291{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001292 switch (self->f_newlinetypes) {
1293 case NEWLINE_UNKNOWN:
1294 Py_INCREF(Py_None);
1295 return Py_None;
1296 case NEWLINE_CR:
1297 return PyString_FromString("\r");
1298 case NEWLINE_LF:
1299 return PyString_FromString("\n");
1300 case NEWLINE_CR|NEWLINE_LF:
1301 return Py_BuildValue("(ss)", "\r", "\n");
1302 case NEWLINE_CRLF:
1303 return PyString_FromString("\r\n");
1304 case NEWLINE_CR|NEWLINE_CRLF:
1305 return Py_BuildValue("(ss)", "\r", "\r\n");
1306 case NEWLINE_LF|NEWLINE_CRLF:
1307 return Py_BuildValue("(ss)", "\n", "\r\n");
1308 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1309 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1310 default:
1311 PyErr_Format(PyExc_SystemError,
1312 "Unknown newlines value 0x%x\n",
1313 self->f_newlinetypes);
1314 return NULL;
1315 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001316}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001317
1318static PyObject *
1319BZ2File_get_closed(BZ2FileObject *self, void *closure)
1320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001321 return PyInt_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001322}
1323
1324static PyObject *
1325BZ2File_get_mode(BZ2FileObject *self, void *closure)
1326{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001327 return PyObject_GetAttrString(self->file, "mode");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001328}
1329
1330static PyObject *
1331BZ2File_get_name(BZ2FileObject *self, void *closure)
1332{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001333 return PyObject_GetAttrString(self->file, "name");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001334}
1335
1336static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001337 {"closed", (getter)BZ2File_get_closed, NULL,
1338 "True if the file is closed"},
1339 {"newlines", (getter)BZ2File_get_newlines, NULL,
1340 "end-of-line convention used in this file"},
1341 {"mode", (getter)BZ2File_get_mode, NULL,
1342 "file mode ('r', 'w', or 'U')"},
1343 {"name", (getter)BZ2File_get_name, NULL,
1344 "file name"},
1345 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001346};
1347
1348
1349/* ===================================================================== */
1350/* Members of BZ2File_Type. */
1351
1352#undef OFF
1353#define OFF(x) offsetof(BZ2FileObject, x)
1354
1355static PyMemberDef BZ2File_members[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001356 {"softspace", T_INT, OFF(f_softspace), 0,
1357 "flag indicating that a space needs to be printed; used by print"},
1358 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001359};
1360
1361/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001362/* Slot definitions for BZ2File_Type. */
1363
1364static int
1365BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1366{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001367 static char *kwlist[] = {"filename", "mode", "buffering",
1368 "compresslevel", 0};
1369 PyObject *name;
1370 char *mode = "r";
1371 int buffering = -1;
1372 int compresslevel = 9;
1373 int bzerror;
1374 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001375
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001376 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001377
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001378 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1379 kwlist, &name, &mode, &buffering,
1380 &compresslevel))
1381 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001382
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001383 if (compresslevel < 1 || compresslevel > 9) {
1384 PyErr_SetString(PyExc_ValueError,
1385 "compresslevel must be between 1 and 9");
1386 return -1;
1387 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001389 for (;;) {
1390 int error = 0;
1391 switch (*mode) {
1392 case 'r':
1393 case 'w':
1394 if (mode_char)
1395 error = 1;
1396 mode_char = *mode;
1397 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001399 case 'b':
1400 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001401
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001402 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001403#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001404 self->f_univ_newline = 0;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001405#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001406 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001407#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001408 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001409
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001410 default:
1411 error = 1;
1412 break;
1413 }
1414 if (error) {
1415 PyErr_Format(PyExc_ValueError,
1416 "invalid mode char %c", *mode);
1417 return -1;
1418 }
1419 mode++;
1420 if (*mode == '\0')
1421 break;
1422 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001423
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 if (mode_char == 0) {
1425 mode_char = 'r';
1426 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001427
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001428 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001429
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001430 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1431 name, mode, buffering);
1432 if (self->file == NULL)
1433 return -1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001434
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001435 /* From now on, we have stuff to dealloc, so jump to error label
1436 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001438#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001439 self->lock = PyThread_allocate_lock();
1440 if (!self->lock) {
1441 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1442 goto error;
1443 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001444#endif
1445
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001446 if (mode_char == 'r')
1447 self->fp = BZ2_bzReadOpen(&bzerror,
1448 PyFile_AsFile(self->file),
1449 0, 0, NULL, 0);
1450 else
1451 self->fp = BZ2_bzWriteOpen(&bzerror,
1452 PyFile_AsFile(self->file),
1453 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001454
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001455 if (bzerror != BZ_OK) {
1456 Util_CatchBZ2Error(bzerror);
1457 goto error;
1458 }
1459 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001460
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001461 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001462
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001463 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001464
1465error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001466 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001468 if (self->lock) {
1469 PyThread_free_lock(self->lock);
1470 self->lock = NULL;
1471 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001472#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001474}
1475
1476static void
1477BZ2File_dealloc(BZ2FileObject *self)
1478{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001479 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001480#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001481 if (self->lock)
1482 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001483#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001484 switch (self->mode) {
1485 case MODE_READ:
1486 case MODE_READ_EOF:
1487 BZ2_bzReadClose(&bzerror, self->fp);
1488 break;
1489 case MODE_WRITE:
1490 BZ2_bzWriteClose(&bzerror, self->fp,
1491 0, NULL, NULL);
1492 break;
1493 }
1494 if (self->fp) {
1495 PyFile_DecUseCount((PyFileObject *)self->file);
1496 self->fp = NULL;
1497 }
1498 Util_DropReadAhead(self);
1499 Py_XDECREF(self->file);
1500 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001501}
1502
1503/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1504static PyObject *
1505BZ2File_getiter(BZ2FileObject *self)
1506{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001507 if (self->mode == MODE_CLOSED) {
1508 PyErr_SetString(PyExc_ValueError,
1509 "I/O operation on closed file");
1510 return NULL;
1511 }
1512 Py_INCREF((PyObject*)self);
1513 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001514}
1515
1516/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1517#define READAHEAD_BUFSIZE 8192
1518static PyObject *
1519BZ2File_iternext(BZ2FileObject *self)
1520{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001521 PyStringObject* ret;
1522 ACQUIRE_LOCK(self);
1523 if (self->mode == MODE_CLOSED) {
1524 RELEASE_LOCK(self);
1525 PyErr_SetString(PyExc_ValueError,
1526 "I/O operation on closed file");
1527 return NULL;
1528 }
1529 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1530 RELEASE_LOCK(self);
1531 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1532 Py_XDECREF(ret);
1533 return NULL;
1534 }
1535 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001536}
1537
1538/* ===================================================================== */
1539/* BZ2File_Type definition. */
1540
1541PyDoc_VAR(BZ2File__doc__) =
1542PyDoc_STR(
1543"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1544\n\
1545Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1546writing. When opened for writing, the file will be created if it doesn't\n\
1547exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1548unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1549is given, must be a number between 1 and 9.\n\
1550")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001551PyDoc_STR(
1552"\n\
1553Add a 'U' to mode to open the file for input with universal newline\n\
1554support. Any line ending in the input file will be seen as a '\\n' in\n\
1555Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1556for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1557'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1558newlines are available only when reading.\n\
1559")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001560;
1561
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001562static PyTypeObject BZ2File_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001563 PyVarObject_HEAD_INIT(NULL, 0)
1564 "bz2.BZ2File", /*tp_name*/
1565 sizeof(BZ2FileObject), /*tp_basicsize*/
1566 0, /*tp_itemsize*/
1567 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1568 0, /*tp_print*/
1569 0, /*tp_getattr*/
1570 0, /*tp_setattr*/
1571 0, /*tp_compare*/
1572 0, /*tp_repr*/
1573 0, /*tp_as_number*/
1574 0, /*tp_as_sequence*/
1575 0, /*tp_as_mapping*/
1576 0, /*tp_hash*/
1577 0, /*tp_call*/
1578 0, /*tp_str*/
1579 PyObject_GenericGetAttr,/*tp_getattro*/
1580 PyObject_GenericSetAttr,/*tp_setattro*/
1581 0, /*tp_as_buffer*/
1582 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1583 BZ2File__doc__, /*tp_doc*/
1584 0, /*tp_traverse*/
1585 0, /*tp_clear*/
1586 0, /*tp_richcompare*/
1587 0, /*tp_weaklistoffset*/
1588 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1589 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1590 BZ2File_methods, /*tp_methods*/
1591 BZ2File_members, /*tp_members*/
1592 BZ2File_getset, /*tp_getset*/
1593 0, /*tp_base*/
1594 0, /*tp_dict*/
1595 0, /*tp_descr_get*/
1596 0, /*tp_descr_set*/
1597 0, /*tp_dictoffset*/
1598 (initproc)BZ2File_init, /*tp_init*/
1599 PyType_GenericAlloc, /*tp_alloc*/
1600 PyType_GenericNew, /*tp_new*/
1601 _PyObject_Del, /*tp_free*/
1602 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001603};
1604
1605
1606/* ===================================================================== */
1607/* Methods of BZ2Comp. */
1608
1609PyDoc_STRVAR(BZ2Comp_compress__doc__,
1610"compress(data) -> string\n\
1611\n\
1612Provide more data to the compressor object. It will return chunks of\n\
1613compressed data whenever possible. When you've finished providing data\n\
1614to compress, call the flush() method to finish the compression process,\n\
1615and return what is left in the internal buffers.\n\
1616");
1617
1618static PyObject *
1619BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1620{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001621 Py_buffer pdata;
1622 char *data;
1623 int datasize;
1624 int bufsize = SMALLCHUNK;
1625 PY_LONG_LONG totalout;
1626 PyObject *ret = NULL;
1627 bz_stream *bzs = &self->bzs;
1628 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001629
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001630 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1631 return NULL;
1632 data = pdata.buf;
1633 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001634
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001635 if (datasize == 0) {
1636 PyBuffer_Release(&pdata);
1637 return PyString_FromString("");
1638 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001639
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001640 ACQUIRE_LOCK(self);
1641 if (!self->running) {
1642 PyErr_SetString(PyExc_ValueError,
1643 "this object was already flushed");
1644 goto error;
1645 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001646
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001647 ret = PyString_FromStringAndSize(NULL, bufsize);
1648 if (!ret)
1649 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001650
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001651 bzs->next_in = data;
1652 bzs->avail_in = datasize;
1653 bzs->next_out = BUF(ret);
1654 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001655
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001656 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001657
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001658 for (;;) {
1659 Py_BEGIN_ALLOW_THREADS
1660 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1661 Py_END_ALLOW_THREADS
1662 if (bzerror != BZ_RUN_OK) {
1663 Util_CatchBZ2Error(bzerror);
1664 goto error;
1665 }
1666 if (bzs->avail_in == 0)
1667 break; /* no more input data */
1668 if (bzs->avail_out == 0) {
1669 bufsize = Util_NewBufferSize(bufsize);
1670 if (_PyString_Resize(&ret, bufsize) < 0) {
1671 BZ2_bzCompressEnd(bzs);
1672 goto error;
1673 }
1674 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1675 - totalout);
1676 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1677 }
1678 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001679
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001680 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001681
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001682 RELEASE_LOCK(self);
1683 PyBuffer_Release(&pdata);
1684 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001685
1686error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001687 RELEASE_LOCK(self);
1688 PyBuffer_Release(&pdata);
1689 Py_XDECREF(ret);
1690 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001691}
1692
1693PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694"flush() -> string\n\
1695\n\
1696Finish the compression process and return what is left in internal buffers.\n\
1697You must not use the compressor object after calling this method.\n\
1698");
1699
1700static PyObject *
1701BZ2Comp_flush(BZ2CompObject *self)
1702{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001703 int bufsize = SMALLCHUNK;
1704 PyObject *ret = NULL;
1705 bz_stream *bzs = &self->bzs;
1706 PY_LONG_LONG totalout;
1707 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001708
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001709 ACQUIRE_LOCK(self);
1710 if (!self->running) {
1711 PyErr_SetString(PyExc_ValueError, "object was already "
1712 "flushed");
1713 goto error;
1714 }
1715 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001716
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001717 ret = PyString_FromStringAndSize(NULL, bufsize);
1718 if (!ret)
1719 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001720
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001721 bzs->next_out = BUF(ret);
1722 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001723
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001724 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001725
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001726 for (;;) {
1727 Py_BEGIN_ALLOW_THREADS
1728 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1729 Py_END_ALLOW_THREADS
1730 if (bzerror == BZ_STREAM_END) {
1731 break;
1732 } else if (bzerror != BZ_FINISH_OK) {
1733 Util_CatchBZ2Error(bzerror);
1734 goto error;
1735 }
1736 if (bzs->avail_out == 0) {
1737 bufsize = Util_NewBufferSize(bufsize);
1738 if (_PyString_Resize(&ret, bufsize) < 0)
1739 goto error;
1740 bzs->next_out = BUF(ret);
1741 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1742 - totalout);
1743 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1744 }
1745 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001746
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001747 if (bzs->avail_out != 0)
1748 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001749
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001750 RELEASE_LOCK(self);
1751 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001752
1753error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001754 RELEASE_LOCK(self);
1755 Py_XDECREF(ret);
1756 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001757}
1758
1759static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001760 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1761 BZ2Comp_compress__doc__},
1762 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1763 BZ2Comp_flush__doc__},
1764 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001765};
1766
1767
1768/* ===================================================================== */
1769/* Slot definitions for BZ2Comp_Type. */
1770
1771static int
1772BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1773{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001774 int compresslevel = 9;
1775 int bzerror;
1776 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001777
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001778 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1779 kwlist, &compresslevel))
1780 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001781
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001782 if (compresslevel < 1 || compresslevel > 9) {
1783 PyErr_SetString(PyExc_ValueError,
1784 "compresslevel must be between 1 and 9");
1785 goto error;
1786 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001787
1788#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001789 self->lock = PyThread_allocate_lock();
1790 if (!self->lock) {
1791 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1792 goto error;
1793 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001794#endif
1795
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001796 memset(&self->bzs, 0, sizeof(bz_stream));
1797 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1798 if (bzerror != BZ_OK) {
1799 Util_CatchBZ2Error(bzerror);
1800 goto error;
1801 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001802
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001803 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001804
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001805 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001806error:
1807#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001808 if (self->lock) {
1809 PyThread_free_lock(self->lock);
1810 self->lock = NULL;
1811 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001812#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001813 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001814}
1815
1816static void
1817BZ2Comp_dealloc(BZ2CompObject *self)
1818{
1819#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001820 if (self->lock)
1821 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001822#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001823 BZ2_bzCompressEnd(&self->bzs);
1824 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001825}
1826
1827
1828/* ===================================================================== */
1829/* BZ2Comp_Type definition. */
1830
1831PyDoc_STRVAR(BZ2Comp__doc__,
1832"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1833\n\
1834Create a new compressor object. This object may be used to compress\n\
1835data sequentially. If you want to compress data in one shot, use the\n\
1836compress() function instead. The compresslevel parameter, if given,\n\
1837must be a number between 1 and 9.\n\
1838");
1839
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001840static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001841 PyVarObject_HEAD_INIT(NULL, 0)
1842 "bz2.BZ2Compressor", /*tp_name*/
1843 sizeof(BZ2CompObject), /*tp_basicsize*/
1844 0, /*tp_itemsize*/
1845 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1846 0, /*tp_print*/
1847 0, /*tp_getattr*/
1848 0, /*tp_setattr*/
1849 0, /*tp_compare*/
1850 0, /*tp_repr*/
1851 0, /*tp_as_number*/
1852 0, /*tp_as_sequence*/
1853 0, /*tp_as_mapping*/
1854 0, /*tp_hash*/
1855 0, /*tp_call*/
1856 0, /*tp_str*/
1857 PyObject_GenericGetAttr,/*tp_getattro*/
1858 PyObject_GenericSetAttr,/*tp_setattro*/
1859 0, /*tp_as_buffer*/
1860 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1861 BZ2Comp__doc__, /*tp_doc*/
1862 0, /*tp_traverse*/
1863 0, /*tp_clear*/
1864 0, /*tp_richcompare*/
1865 0, /*tp_weaklistoffset*/
1866 0, /*tp_iter*/
1867 0, /*tp_iternext*/
1868 BZ2Comp_methods, /*tp_methods*/
1869 0, /*tp_members*/
1870 0, /*tp_getset*/
1871 0, /*tp_base*/
1872 0, /*tp_dict*/
1873 0, /*tp_descr_get*/
1874 0, /*tp_descr_set*/
1875 0, /*tp_dictoffset*/
1876 (initproc)BZ2Comp_init, /*tp_init*/
1877 PyType_GenericAlloc, /*tp_alloc*/
1878 PyType_GenericNew, /*tp_new*/
1879 _PyObject_Del, /*tp_free*/
1880 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001881};
1882
1883
1884/* ===================================================================== */
1885/* Members of BZ2Decomp. */
1886
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001887#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001888#define OFF(x) offsetof(BZ2DecompObject, x)
1889
1890static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001891 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1892 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001893};
1894
1895
1896/* ===================================================================== */
1897/* Methods of BZ2Decomp. */
1898
1899PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1900"decompress(data) -> string\n\
1901\n\
1902Provide more data to the decompressor object. It will return chunks\n\
1903of decompressed data whenever possible. If you try to decompress data\n\
1904after the end of stream is found, EOFError will be raised. If any data\n\
1905was found after the end of stream, it'll be ignored and saved in\n\
1906unused_data attribute.\n\
1907");
1908
1909static PyObject *
1910BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1911{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001912 Py_buffer pdata;
1913 char *data;
1914 int datasize;
1915 int bufsize = SMALLCHUNK;
1916 PY_LONG_LONG totalout;
1917 PyObject *ret = NULL;
1918 bz_stream *bzs = &self->bzs;
1919 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001920
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001921 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1922 return NULL;
1923 data = pdata.buf;
1924 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001926 ACQUIRE_LOCK(self);
1927 if (!self->running) {
1928 PyErr_SetString(PyExc_EOFError, "end of stream was "
1929 "already found");
1930 goto error;
1931 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001932
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001933 ret = PyString_FromStringAndSize(NULL, bufsize);
1934 if (!ret)
1935 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001936
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001937 bzs->next_in = data;
1938 bzs->avail_in = datasize;
1939 bzs->next_out = BUF(ret);
1940 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001942 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001943
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001944 for (;;) {
1945 Py_BEGIN_ALLOW_THREADS
1946 bzerror = BZ2_bzDecompress(bzs);
1947 Py_END_ALLOW_THREADS
1948 if (bzerror == BZ_STREAM_END) {
1949 if (bzs->avail_in != 0) {
1950 Py_DECREF(self->unused_data);
1951 self->unused_data =
1952 PyString_FromStringAndSize(bzs->next_in,
1953 bzs->avail_in);
1954 }
1955 self->running = 0;
1956 break;
1957 }
1958 if (bzerror != BZ_OK) {
1959 Util_CatchBZ2Error(bzerror);
1960 goto error;
1961 }
1962 if (bzs->avail_in == 0)
1963 break; /* no more input data */
1964 if (bzs->avail_out == 0) {
1965 bufsize = Util_NewBufferSize(bufsize);
1966 if (_PyString_Resize(&ret, bufsize) < 0) {
1967 BZ2_bzDecompressEnd(bzs);
1968 goto error;
1969 }
1970 bzs->next_out = BUF(ret);
1971 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1972 - totalout);
1973 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1974 }
1975 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001976
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001977 if (bzs->avail_out != 0)
1978 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001979
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001980 RELEASE_LOCK(self);
1981 PyBuffer_Release(&pdata);
1982 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001983
1984error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001985 RELEASE_LOCK(self);
1986 PyBuffer_Release(&pdata);
1987 Py_XDECREF(ret);
1988 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001989}
1990
1991static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001992 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1993 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001994};
1995
1996
1997/* ===================================================================== */
1998/* Slot definitions for BZ2Decomp_Type. */
1999
2000static int
2001BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2002{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002003 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002004
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002005 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2006 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002007
2008#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002009 self->lock = PyThread_allocate_lock();
2010 if (!self->lock) {
2011 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2012 goto error;
2013 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002014#endif
2015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 self->unused_data = PyString_FromString("");
2017 if (!self->unused_data)
2018 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002020 memset(&self->bzs, 0, sizeof(bz_stream));
2021 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2022 if (bzerror != BZ_OK) {
2023 Util_CatchBZ2Error(bzerror);
2024 goto error;
2025 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002027 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002029 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002030
2031error:
2032#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002033 if (self->lock) {
2034 PyThread_free_lock(self->lock);
2035 self->lock = NULL;
2036 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002037#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002038 Py_CLEAR(self->unused_data);
2039 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002040}
2041
2042static void
2043BZ2Decomp_dealloc(BZ2DecompObject *self)
2044{
2045#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002046 if (self->lock)
2047 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002048#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002049 Py_XDECREF(self->unused_data);
2050 BZ2_bzDecompressEnd(&self->bzs);
2051 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002052}
2053
2054
2055/* ===================================================================== */
2056/* BZ2Decomp_Type definition. */
2057
2058PyDoc_STRVAR(BZ2Decomp__doc__,
2059"BZ2Decompressor() -> decompressor object\n\
2060\n\
2061Create a new decompressor object. This object may be used to decompress\n\
2062data sequentially. If you want to decompress data in one shot, use the\n\
2063decompress() function instead.\n\
2064");
2065
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002066static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002067 PyVarObject_HEAD_INIT(NULL, 0)
2068 "bz2.BZ2Decompressor", /*tp_name*/
2069 sizeof(BZ2DecompObject), /*tp_basicsize*/
2070 0, /*tp_itemsize*/
2071 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2072 0, /*tp_print*/
2073 0, /*tp_getattr*/
2074 0, /*tp_setattr*/
2075 0, /*tp_compare*/
2076 0, /*tp_repr*/
2077 0, /*tp_as_number*/
2078 0, /*tp_as_sequence*/
2079 0, /*tp_as_mapping*/
2080 0, /*tp_hash*/
2081 0, /*tp_call*/
2082 0, /*tp_str*/
2083 PyObject_GenericGetAttr,/*tp_getattro*/
2084 PyObject_GenericSetAttr,/*tp_setattro*/
2085 0, /*tp_as_buffer*/
2086 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2087 BZ2Decomp__doc__, /*tp_doc*/
2088 0, /*tp_traverse*/
2089 0, /*tp_clear*/
2090 0, /*tp_richcompare*/
2091 0, /*tp_weaklistoffset*/
2092 0, /*tp_iter*/
2093 0, /*tp_iternext*/
2094 BZ2Decomp_methods, /*tp_methods*/
2095 BZ2Decomp_members, /*tp_members*/
2096 0, /*tp_getset*/
2097 0, /*tp_base*/
2098 0, /*tp_dict*/
2099 0, /*tp_descr_get*/
2100 0, /*tp_descr_set*/
2101 0, /*tp_dictoffset*/
2102 (initproc)BZ2Decomp_init, /*tp_init*/
2103 PyType_GenericAlloc, /*tp_alloc*/
2104 PyType_GenericNew, /*tp_new*/
2105 _PyObject_Del, /*tp_free*/
2106 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002107};
2108
2109
2110/* ===================================================================== */
2111/* Module functions. */
2112
2113PyDoc_STRVAR(bz2_compress__doc__,
2114"compress(data [, compresslevel=9]) -> string\n\
2115\n\
2116Compress data in one shot. If you want to compress data sequentially,\n\
2117use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2118given, must be a number between 1 and 9.\n\
2119");
2120
2121static PyObject *
2122bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2123{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 int compresslevel=9;
2125 Py_buffer pdata;
2126 char *data;
2127 int datasize;
2128 int bufsize;
2129 PyObject *ret = NULL;
2130 bz_stream _bzs;
2131 bz_stream *bzs = &_bzs;
2132 int bzerror;
2133 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002134
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002135 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2136 kwlist, &pdata,
2137 &compresslevel))
2138 return NULL;
2139 data = pdata.buf;
2140 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002141
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002142 if (compresslevel < 1 || compresslevel > 9) {
2143 PyErr_SetString(PyExc_ValueError,
2144 "compresslevel must be between 1 and 9");
2145 PyBuffer_Release(&pdata);
2146 return NULL;
2147 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002148
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002149 /* Conforming to bz2 manual, this is large enough to fit compressed
2150 * data in one shot. We will check it later anyway. */
2151 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002152
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002153 ret = PyString_FromStringAndSize(NULL, bufsize);
2154 if (!ret) {
2155 PyBuffer_Release(&pdata);
2156 return NULL;
2157 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002158
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002159 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002160
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002161 bzs->next_in = data;
2162 bzs->avail_in = datasize;
2163 bzs->next_out = BUF(ret);
2164 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002166 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2167 if (bzerror != BZ_OK) {
2168 Util_CatchBZ2Error(bzerror);
2169 PyBuffer_Release(&pdata);
2170 Py_DECREF(ret);
2171 return NULL;
2172 }
Tim Peterse3228092002-11-09 04:21:44 +00002173
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002174 for (;;) {
2175 Py_BEGIN_ALLOW_THREADS
2176 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2177 Py_END_ALLOW_THREADS
2178 if (bzerror == BZ_STREAM_END) {
2179 break;
2180 } else if (bzerror != BZ_FINISH_OK) {
2181 BZ2_bzCompressEnd(bzs);
2182 Util_CatchBZ2Error(bzerror);
2183 PyBuffer_Release(&pdata);
2184 Py_DECREF(ret);
2185 return NULL;
2186 }
2187 if (bzs->avail_out == 0) {
2188 bufsize = Util_NewBufferSize(bufsize);
2189 if (_PyString_Resize(&ret, bufsize) < 0) {
2190 BZ2_bzCompressEnd(bzs);
2191 PyBuffer_Release(&pdata);
2192 Py_DECREF(ret);
2193 return NULL;
2194 }
2195 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2196 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2197 }
2198 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002199
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002200 if (bzs->avail_out != 0)
2201 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2202 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002203
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002204 PyBuffer_Release(&pdata);
2205 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002206}
2207
2208PyDoc_STRVAR(bz2_decompress__doc__,
2209"decompress(data) -> decompressed data\n\
2210\n\
2211Decompress data in one shot. If you want to decompress data sequentially,\n\
2212use an instance of BZ2Decompressor instead.\n\
2213");
2214
2215static PyObject *
2216bz2_decompress(PyObject *self, PyObject *args)
2217{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002218 Py_buffer pdata;
2219 char *data;
2220 int datasize;
2221 int bufsize = SMALLCHUNK;
2222 PyObject *ret;
2223 bz_stream _bzs;
2224 bz_stream *bzs = &_bzs;
2225 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002226
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002227 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2228 return NULL;
2229 data = pdata.buf;
2230 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002231
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002232 if (datasize == 0) {
2233 PyBuffer_Release(&pdata);
2234 return PyString_FromString("");
2235 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002236
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002237 ret = PyString_FromStringAndSize(NULL, bufsize);
2238 if (!ret) {
2239 PyBuffer_Release(&pdata);
2240 return NULL;
2241 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002242
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002243 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002244
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002245 bzs->next_in = data;
2246 bzs->avail_in = datasize;
2247 bzs->next_out = BUF(ret);
2248 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002249
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002250 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2251 if (bzerror != BZ_OK) {
2252 Util_CatchBZ2Error(bzerror);
2253 Py_DECREF(ret);
2254 PyBuffer_Release(&pdata);
2255 return NULL;
2256 }
Tim Peterse3228092002-11-09 04:21:44 +00002257
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002258 for (;;) {
2259 Py_BEGIN_ALLOW_THREADS
2260 bzerror = BZ2_bzDecompress(bzs);
2261 Py_END_ALLOW_THREADS
2262 if (bzerror == BZ_STREAM_END) {
2263 break;
2264 } else if (bzerror != BZ_OK) {
2265 BZ2_bzDecompressEnd(bzs);
2266 Util_CatchBZ2Error(bzerror);
2267 PyBuffer_Release(&pdata);
2268 Py_DECREF(ret);
2269 return NULL;
2270 }
2271 if (bzs->avail_in == 0) {
2272 BZ2_bzDecompressEnd(bzs);
2273 PyErr_SetString(PyExc_ValueError,
2274 "couldn't find end of stream");
2275 PyBuffer_Release(&pdata);
2276 Py_DECREF(ret);
2277 return NULL;
2278 }
2279 if (bzs->avail_out == 0) {
2280 bufsize = Util_NewBufferSize(bufsize);
2281 if (_PyString_Resize(&ret, bufsize) < 0) {
2282 BZ2_bzDecompressEnd(bzs);
2283 PyBuffer_Release(&pdata);
2284 Py_DECREF(ret);
2285 return NULL;
2286 }
2287 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2288 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2289 }
2290 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002291
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002292 if (bzs->avail_out != 0)
2293 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2294 BZ2_bzDecompressEnd(bzs);
2295 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002296
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002297 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002298}
2299
2300static PyMethodDef bz2_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002301 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2302 bz2_compress__doc__},
2303 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2304 bz2_decompress__doc__},
2305 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002306};
2307
2308/* ===================================================================== */
2309/* Initialization function. */
2310
2311PyDoc_STRVAR(bz2__doc__,
2312"The python bz2 module provides a comprehensive interface for\n\
2313the bz2 compression library. It implements a complete file\n\
2314interface, one shot (de)compression functions, and types for\n\
2315sequential (de)compression.\n\
2316");
2317
Neal Norwitz21d896c2003-07-01 20:15:21 +00002318PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002319initbz2(void)
2320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002321 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002322
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002323 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2324 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2325 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002326
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002327 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2328 if (m == NULL)
2329 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002330
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002331 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002332
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002333 Py_INCREF(&BZ2File_Type);
2334 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002335
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002336 Py_INCREF(&BZ2Comp_Type);
2337 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002338
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002339 Py_INCREF(&BZ2Decomp_Type);
2340 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002341}