blob: 9c59c041692ff64d50628890038bbf9f7dd25317 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouc83ea132010-05-09 14:46:46 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitroudd629662009-10-27 17:41:58 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000103 PyObject_HEAD
104 PyObject *file;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000110 int f_softspace; /* Flag used by 'print' command */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000111
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000115
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000121 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000122#endif
123} BZ2FileObject;
124
125typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000129#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000130 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000131#endif
132} BZ2CompObject;
133
134typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000139#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000140 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
Antoine Pitrou39703012010-08-01 20:13:11 +0000147/* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150static int
151check_iterbuffered(BZ2FileObject *f)
152{
153 if (f->f_buf != NULL &&
154 (f->f_bufend - f->f_bufptr) > 0 &&
155 f->f_buf[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError,
157 "Mixing iteration and read methods would lose data");
158 return -1;
159 }
160 return 0;
161}
162
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000163static int
164Util_CatchBZ2Error(int bzerror)
165{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000166 int ret = 0;
167 switch(bzerror) {
168 case BZ_OK:
169 case BZ_STREAM_END:
170 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000171
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000172#ifdef BZ_CONFIG_ERROR
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000173 case BZ_CONFIG_ERROR:
174 PyErr_SetString(PyExc_SystemError,
175 "the bz2 library was not compiled "
176 "correctly");
177 ret = 1;
178 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000179#endif
Tim Peterse3228092002-11-09 04:21:44 +0000180
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 case BZ_PARAM_ERROR:
182 PyErr_SetString(PyExc_ValueError,
183 "the bz2 library has received wrong "
184 "parameters");
185 ret = 1;
186 break;
Tim Peterse3228092002-11-09 04:21:44 +0000187
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000188 case BZ_MEM_ERROR:
189 PyErr_NoMemory();
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000193 case BZ_DATA_ERROR:
194 case BZ_DATA_ERROR_MAGIC:
195 PyErr_SetString(PyExc_IOError, "invalid data stream");
196 ret = 1;
197 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 case BZ_IO_ERROR:
200 PyErr_SetString(PyExc_IOError, "unknown IO error");
201 ret = 1;
202 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000203
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000204 case BZ_UNEXPECTED_EOF:
205 PyErr_SetString(PyExc_EOFError,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
208 ret = 1;
209 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000210
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000211 case BZ_SEQUENCE_ERROR:
212 PyErr_SetString(PyExc_RuntimeError,
213 "wrong sequence of bz2 library "
214 "commands used");
215 ret = 1;
216 break;
217 }
218 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000219}
220
221#if BUFSIZ < 8192
222#define SMALLCHUNK 8192
223#else
224#define SMALLCHUNK BUFSIZ
225#endif
226
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000227/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
228static size_t
229Util_NewBufferSize(size_t currentsize)
230{
Nadeem Vawda36248152011-10-13 13:52:46 +0200231 /* Expand the buffer by an amount proportional to the current size,
232 giving us amortized linear-time behavior. Use a less-than-double
233 growth factor to avoid excessive allocation. */
234 return currentsize + (currentsize >> 3) + 6;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000235}
236
237/* This is a hacked version of Python's fileobject.c:get_line(). */
238static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000239Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000240{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000241 char c;
242 char *buf, *end;
243 size_t total_v_size; /* total # of slots in buffer */
244 size_t used_v_size; /* # used slots in buffer */
245 size_t increment; /* amount to increment the buffer */
246 PyObject *v;
247 int bzerror;
248 int bytes_read;
249 int newlinetypes = f->f_newlinetypes;
250 int skipnextlf = f->f_skipnextlf;
251 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000253 total_v_size = n > 0 ? n : 100;
254 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
255 if (v == NULL)
256 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000257
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000258 buf = BUF(v);
259 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000260
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000261 for (;;) {
262 Py_BEGIN_ALLOW_THREADS
263 while (buf != end) {
264 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
265 f->pos++;
266 if (bytes_read == 0) break;
267 if (univ_newline) {
268 if (skipnextlf) {
269 skipnextlf = 0;
270 if (c == '\n') {
271 /* Seeing a \n here with skipnextlf true means we
272 * saw a \r before.
273 */
274 newlinetypes |= NEWLINE_CRLF;
275 if (bzerror != BZ_OK) break;
276 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
277 f->pos++;
278 if (bytes_read == 0) break;
279 } else {
280 newlinetypes |= NEWLINE_CR;
281 }
282 }
283 if (c == '\r') {
284 skipnextlf = 1;
285 c = '\n';
286 } else if (c == '\n')
287 newlinetypes |= NEWLINE_LF;
288 }
289 *buf++ = c;
290 if (bzerror != BZ_OK || c == '\n') break;
291 }
292 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
293 newlinetypes |= NEWLINE_CR;
294 Py_END_ALLOW_THREADS
295 f->f_newlinetypes = newlinetypes;
296 f->f_skipnextlf = skipnextlf;
297 if (bzerror == BZ_STREAM_END) {
298 f->size = f->pos;
299 f->mode = MODE_READ_EOF;
300 break;
301 } else if (bzerror != BZ_OK) {
302 Util_CatchBZ2Error(bzerror);
303 Py_DECREF(v);
304 return NULL;
305 }
306 if (c == '\n')
307 break;
308 /* Must be because buf == end */
309 if (n > 0)
310 break;
311 used_v_size = total_v_size;
312 increment = total_v_size >> 2; /* mild exponential growth */
313 total_v_size += increment;
314 if (total_v_size > INT_MAX) {
315 PyErr_SetString(PyExc_OverflowError,
316 "line is longer than a Python string can hold");
317 Py_DECREF(v);
318 return NULL;
319 }
320 if (_PyString_Resize(&v, total_v_size) < 0)
321 return NULL;
322 buf = BUF(v) + used_v_size;
323 end = BUF(v) + total_v_size;
324 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000326 used_v_size = buf - BUF(v);
327 if (used_v_size != total_v_size)
328 _PyString_Resize(&v, used_v_size);
329 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000330}
331
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332/* This is a hacked version of Python's
333 * fileobject.c:Py_UniversalNewlineFread(). */
334size_t
335Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000336 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000337{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000338 char *dst = buf;
339 int newlinetypes, skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000340
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 assert(buf != NULL);
342 assert(stream != NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000343
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000344 if (!f->f_univ_newline)
345 return BZ2_bzRead(bzerror, stream, buf, n);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000346
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000347 newlinetypes = f->f_newlinetypes;
348 skipnextlf = f->f_skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000349
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000350 /* Invariant: n is the number of bytes remaining to be filled
351 * in the buffer.
352 */
353 while (n) {
354 size_t nread;
355 int shortread;
356 char *src = dst;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000357
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000358 nread = BZ2_bzRead(bzerror, stream, dst, n);
359 assert(nread <= n);
360 n -= nread; /* assuming 1 byte out for each in; will adjust */
361 shortread = n != 0; /* true iff EOF or error */
362 while (nread--) {
363 char c = *src++;
364 if (c == '\r') {
365 /* Save as LF and set flag to skip next LF. */
366 *dst++ = '\n';
367 skipnextlf = 1;
368 }
369 else if (skipnextlf && c == '\n') {
370 /* Skip LF, and remember we saw CR LF. */
371 skipnextlf = 0;
372 newlinetypes |= NEWLINE_CRLF;
373 ++n;
374 }
375 else {
376 /* Normal char to be stored in buffer. Also
377 * update the newlinetypes flag if either this
378 * is an LF or the previous char was a CR.
379 */
380 if (c == '\n')
381 newlinetypes |= NEWLINE_LF;
382 else if (skipnextlf)
383 newlinetypes |= NEWLINE_CR;
384 *dst++ = c;
385 skipnextlf = 0;
386 }
387 }
388 if (shortread) {
389 /* If this is EOF, update type flags. */
390 if (skipnextlf && *bzerror == BZ_STREAM_END)
391 newlinetypes |= NEWLINE_CR;
392 break;
393 }
394 }
395 f->f_newlinetypes = newlinetypes;
396 f->f_skipnextlf = skipnextlf;
397 return dst - buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000398}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399
400/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
401static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000402Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000404 if (f->f_buf != NULL) {
405 PyMem_Free(f->f_buf);
406 f->f_buf = NULL;
407 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408}
409
410/* This is a hacked version of Python's fileobject.c:readahead(). */
411static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000412Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 int chunksize;
415 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000416
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 if (f->f_buf != NULL) {
418 if((f->f_bufend - f->f_bufptr) >= 1)
419 return 0;
420 else
421 Util_DropReadAhead(f);
422 }
423 if (f->mode == MODE_READ_EOF) {
424 f->f_bufptr = f->f_buf;
425 f->f_bufend = f->f_buf;
426 return 0;
427 }
428 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
429 PyErr_NoMemory();
430 return -1;
431 }
432 Py_BEGIN_ALLOW_THREADS
433 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
434 bufsize, f);
435 Py_END_ALLOW_THREADS
436 f->pos += chunksize;
437 if (bzerror == BZ_STREAM_END) {
438 f->size = f->pos;
439 f->mode = MODE_READ_EOF;
440 } else if (bzerror != BZ_OK) {
441 Util_CatchBZ2Error(bzerror);
442 Util_DropReadAhead(f);
443 return -1;
444 }
445 f->f_bufptr = f->f_buf;
446 f->f_bufend = f->f_buf + chunksize;
447 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000448}
449
450/* This is a hacked version of Python's
451 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000452static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000453Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000454{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000455 PyStringObject* s;
456 char *bufptr;
457 char *buf;
458 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000459
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000460 if (f->f_buf == NULL)
461 if (Util_ReadAhead(f, bufsize) < 0)
462 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000463
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000464 len = f->f_bufend - f->f_bufptr;
465 if (len == 0)
466 return (PyStringObject *)
467 PyString_FromStringAndSize(NULL, skip);
468 bufptr = memchr(f->f_bufptr, '\n', len);
469 if (bufptr != NULL) {
470 bufptr++; /* Count the '\n' */
471 len = bufptr - f->f_bufptr;
472 s = (PyStringObject *)
473 PyString_FromStringAndSize(NULL, skip+len);
474 if (s == NULL)
475 return NULL;
476 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
477 f->f_bufptr = bufptr;
478 if (bufptr == f->f_bufend)
479 Util_DropReadAhead(f);
480 } else {
481 bufptr = f->f_bufptr;
482 buf = f->f_buf;
483 f->f_buf = NULL; /* Force new readahead buffer */
484 s = Util_ReadAheadGetLineSkip(f, skip+len,
485 bufsize + (bufsize>>2));
486 if (s == NULL) {
487 PyMem_Free(buf);
488 return NULL;
489 }
490 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
491 PyMem_Free(buf);
492 }
493 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000494}
495
496/* ===================================================================== */
497/* Methods of BZ2File. */
498
499PyDoc_STRVAR(BZ2File_read__doc__,
500"read([size]) -> string\n\
501\n\
502Read at most size uncompressed bytes, returned as a string. If the size\n\
503argument is negative or omitted, read until EOF is reached.\n\
504");
505
506/* This is a hacked version of Python's fileobject.c:file_read(). */
507static PyObject *
508BZ2File_read(BZ2FileObject *self, PyObject *args)
509{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000510 long bytesrequested = -1;
511 size_t bytesread, buffersize, chunksize;
512 int bzerror;
513 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000514
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000515 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
516 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000517
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000518 ACQUIRE_LOCK(self);
519 switch (self->mode) {
520 case MODE_READ:
521 break;
522 case MODE_READ_EOF:
523 ret = PyString_FromString("");
524 goto cleanup;
525 case MODE_CLOSED:
526 PyErr_SetString(PyExc_ValueError,
527 "I/O operation on closed file");
528 goto cleanup;
529 default:
530 PyErr_SetString(PyExc_IOError,
531 "file is not ready for reading");
532 goto cleanup;
533 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000534
Antoine Pitrou39703012010-08-01 20:13:11 +0000535 /* refuse to mix with f.next() */
536 if (check_iterbuffered(self))
537 goto cleanup;
538
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000539 if (bytesrequested < 0)
540 buffersize = Util_NewBufferSize((size_t)0);
541 else
542 buffersize = bytesrequested;
543 if (buffersize > INT_MAX) {
544 PyErr_SetString(PyExc_OverflowError,
545 "requested number of bytes is "
546 "more than a Python string can hold");
547 goto cleanup;
548 }
549 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
550 if (ret == NULL)
551 goto cleanup;
552 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000553
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000554 for (;;) {
555 Py_BEGIN_ALLOW_THREADS
556 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
557 BUF(ret)+bytesread,
558 buffersize-bytesread,
559 self);
560 self->pos += chunksize;
561 Py_END_ALLOW_THREADS
562 bytesread += chunksize;
563 if (bzerror == BZ_STREAM_END) {
564 self->size = self->pos;
565 self->mode = MODE_READ_EOF;
566 break;
567 } else if (bzerror != BZ_OK) {
568 Util_CatchBZ2Error(bzerror);
569 Py_DECREF(ret);
570 ret = NULL;
571 goto cleanup;
572 }
573 if (bytesrequested < 0) {
574 buffersize = Util_NewBufferSize(buffersize);
575 if (_PyString_Resize(&ret, buffersize) < 0)
576 goto cleanup;
577 } else {
578 break;
579 }
580 }
581 if (bytesread != buffersize)
582 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000583
584cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000585 RELEASE_LOCK(self);
586 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000587}
588
589PyDoc_STRVAR(BZ2File_readline__doc__,
590"readline([size]) -> string\n\
591\n\
592Return the next line from the file, as a string, retaining newline.\n\
593A non-negative size argument will limit the maximum number of bytes to\n\
594return (an incomplete line may be returned then). Return an empty\n\
595string at EOF.\n\
596");
597
598static PyObject *
599BZ2File_readline(BZ2FileObject *self, PyObject *args)
600{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000601 PyObject *ret = NULL;
602 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000603
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000604 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
605 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000606
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000607 ACQUIRE_LOCK(self);
608 switch (self->mode) {
609 case MODE_READ:
610 break;
611 case MODE_READ_EOF:
612 ret = PyString_FromString("");
613 goto cleanup;
614 case MODE_CLOSED:
615 PyErr_SetString(PyExc_ValueError,
616 "I/O operation on closed file");
617 goto cleanup;
618 default:
619 PyErr_SetString(PyExc_IOError,
620 "file is not ready for reading");
621 goto cleanup;
622 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000623
Antoine Pitrou39703012010-08-01 20:13:11 +0000624 /* refuse to mix with f.next() */
625 if (check_iterbuffered(self))
626 goto cleanup;
627
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000628 if (sizehint == 0)
629 ret = PyString_FromString("");
630 else
631 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000632
633cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000634 RELEASE_LOCK(self);
635 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000636}
637
638PyDoc_STRVAR(BZ2File_readlines__doc__,
639"readlines([size]) -> list\n\
640\n\
641Call readline() repeatedly and return a list of lines read.\n\
642The optional size argument, if given, is an approximate bound on the\n\
643total number of bytes in the lines returned.\n\
644");
645
646/* This is a hacked version of Python's fileobject.c:file_readlines(). */
647static PyObject *
648BZ2File_readlines(BZ2FileObject *self, PyObject *args)
649{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000650 long sizehint = 0;
651 PyObject *list = NULL;
652 PyObject *line;
653 char small_buffer[SMALLCHUNK];
654 char *buffer = small_buffer;
655 size_t buffersize = SMALLCHUNK;
656 PyObject *big_buffer = NULL;
657 size_t nfilled = 0;
658 size_t nread;
659 size_t totalread = 0;
660 char *p, *q, *end;
661 int err;
662 int shortread = 0;
663 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000664
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000665 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
666 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000667
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000668 ACQUIRE_LOCK(self);
669 switch (self->mode) {
670 case MODE_READ:
671 break;
672 case MODE_READ_EOF:
673 list = PyList_New(0);
674 goto cleanup;
675 case MODE_CLOSED:
676 PyErr_SetString(PyExc_ValueError,
677 "I/O operation on closed file");
678 goto cleanup;
679 default:
680 PyErr_SetString(PyExc_IOError,
681 "file is not ready for reading");
682 goto cleanup;
683 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000684
Antoine Pitrou39703012010-08-01 20:13:11 +0000685 /* refuse to mix with f.next() */
686 if (check_iterbuffered(self))
687 goto cleanup;
688
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000689 if ((list = PyList_New(0)) == NULL)
690 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000691
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000692 for (;;) {
693 Py_BEGIN_ALLOW_THREADS
694 nread = Util_UnivNewlineRead(&bzerror, self->fp,
695 buffer+nfilled,
696 buffersize-nfilled, self);
697 self->pos += nread;
698 Py_END_ALLOW_THREADS
699 if (bzerror == BZ_STREAM_END) {
700 self->size = self->pos;
701 self->mode = MODE_READ_EOF;
702 if (nread == 0) {
703 sizehint = 0;
704 break;
705 }
706 shortread = 1;
707 } else if (bzerror != BZ_OK) {
708 Util_CatchBZ2Error(bzerror);
709 error:
710 Py_DECREF(list);
711 list = NULL;
712 goto cleanup;
713 }
714 totalread += nread;
715 p = memchr(buffer+nfilled, '\n', nread);
716 if (!shortread && p == NULL) {
717 /* Need a larger buffer to fit this line */
718 nfilled += nread;
719 buffersize *= 2;
720 if (buffersize > INT_MAX) {
721 PyErr_SetString(PyExc_OverflowError,
722 "line is longer than a Python string can hold");
723 goto error;
724 }
725 if (big_buffer == NULL) {
726 /* Create the big buffer */
727 big_buffer = PyString_FromStringAndSize(
728 NULL, buffersize);
729 if (big_buffer == NULL)
730 goto error;
731 buffer = PyString_AS_STRING(big_buffer);
732 memcpy(buffer, small_buffer, nfilled);
733 }
734 else {
735 /* Grow the big buffer */
736 _PyString_Resize(&big_buffer, buffersize);
737 buffer = PyString_AS_STRING(big_buffer);
738 }
739 continue;
740 }
741 end = buffer+nfilled+nread;
742 q = buffer;
743 while (p != NULL) {
744 /* Process complete lines */
745 p++;
746 line = PyString_FromStringAndSize(q, p-q);
747 if (line == NULL)
748 goto error;
749 err = PyList_Append(list, line);
750 Py_DECREF(line);
751 if (err != 0)
752 goto error;
753 q = p;
754 p = memchr(q, '\n', end-q);
755 }
756 /* Move the remaining incomplete line to the start */
757 nfilled = end-q;
758 memmove(buffer, q, nfilled);
759 if (sizehint > 0)
760 if (totalread >= (size_t)sizehint)
761 break;
762 if (shortread) {
763 sizehint = 0;
764 break;
765 }
766 }
767 if (nfilled != 0) {
768 /* Partial last line */
769 line = PyString_FromStringAndSize(buffer, nfilled);
770 if (line == NULL)
771 goto error;
772 if (sizehint > 0) {
773 /* Need to complete the last line */
774 PyObject *rest = Util_GetLine(self, 0);
775 if (rest == NULL) {
776 Py_DECREF(line);
777 goto error;
778 }
779 PyString_Concat(&line, rest);
780 Py_DECREF(rest);
781 if (line == NULL)
782 goto error;
783 }
784 err = PyList_Append(list, line);
785 Py_DECREF(line);
786 if (err != 0)
787 goto error;
788 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000789
790 cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000791 RELEASE_LOCK(self);
792 if (big_buffer) {
793 Py_DECREF(big_buffer);
794 }
795 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000796}
797
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000798PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799"xreadlines() -> self\n\
800\n\
801For backward compatibility. BZ2File objects now include the performance\n\
802optimizations previously implemented in the xreadlines module.\n\
803");
804
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805PyDoc_STRVAR(BZ2File_write__doc__,
806"write(data) -> None\n\
807\n\
808Write the 'data' string to file. Note that due to buffering, close() may\n\
809be needed before the file on disk reflects the data written.\n\
810");
811
812/* This is a hacked version of Python's fileobject.c:file_write(). */
813static PyObject *
814BZ2File_write(BZ2FileObject *self, PyObject *args)
815{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000816 PyObject *ret = NULL;
817 Py_buffer pbuf;
818 char *buf;
819 int len;
820 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000821
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000822 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823 return NULL;
824 buf = pbuf.buf;
825 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000826
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000827 ACQUIRE_LOCK(self);
828 switch (self->mode) {
829 case MODE_WRITE:
830 break;
Tim Peterse3228092002-11-09 04:21:44 +0000831
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000832 case MODE_CLOSED:
833 PyErr_SetString(PyExc_ValueError,
834 "I/O operation on closed file");
835 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000836
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000837 default:
838 PyErr_SetString(PyExc_IOError,
839 "file is not ready for writing");
840 goto cleanup;
841 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000842
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000843 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000844
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000845 Py_BEGIN_ALLOW_THREADS
846 BZ2_bzWrite (&bzerror, self->fp, buf, len);
847 self->pos += len;
848 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000849
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000850 if (bzerror != BZ_OK) {
851 Util_CatchBZ2Error(bzerror);
852 goto cleanup;
853 }
Tim Peterse3228092002-11-09 04:21:44 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_INCREF(Py_None);
856 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000857
858cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 PyBuffer_Release(&pbuf);
860 RELEASE_LOCK(self);
861 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000862}
863
864PyDoc_STRVAR(BZ2File_writelines__doc__,
865"writelines(sequence_of_strings) -> None\n\
866\n\
867Write the sequence of strings to the file. Note that newlines are not\n\
868added. The sequence can be any iterable object producing strings. This is\n\
869equivalent to calling write() for each string.\n\
870");
871
872/* This is a hacked version of Python's fileobject.c:file_writelines(). */
873static PyObject *
874BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875{
876#define CHUNKSIZE 1000
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000877 PyObject *list = NULL;
878 PyObject *iter = NULL;
879 PyObject *ret = NULL;
880 PyObject *line;
881 int i, j, index, len, islist;
882 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000883
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000884 ACQUIRE_LOCK(self);
885 switch (self->mode) {
886 case MODE_WRITE:
887 break;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000888
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 case MODE_CLOSED:
890 PyErr_SetString(PyExc_ValueError,
891 "I/O operation on closed file");
892 goto error;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000893
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000894 default:
895 PyErr_SetString(PyExc_IOError,
896 "file is not ready for writing");
897 goto error;
898 }
Georg Brandl3335a7a2006-08-14 21:42:55 +0000899
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000900 islist = PyList_Check(seq);
901 if (!islist) {
902 iter = PyObject_GetIter(seq);
903 if (iter == NULL) {
904 PyErr_SetString(PyExc_TypeError,
905 "writelines() requires an iterable argument");
906 goto error;
907 }
908 list = PyList_New(CHUNKSIZE);
909 if (list == NULL)
910 goto error;
911 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000912
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000913 /* Strategy: slurp CHUNKSIZE lines into a private list,
914 checking that they are all strings, then write that list
915 without holding the interpreter lock, then come back for more. */
916 for (index = 0; ; index += CHUNKSIZE) {
917 if (islist) {
918 Py_XDECREF(list);
919 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 j = PyList_GET_SIZE(list);
923 }
924 else {
925 for (j = 0; j < CHUNKSIZE; j++) {
926 line = PyIter_Next(iter);
927 if (line == NULL) {
928 if (PyErr_Occurred())
929 goto error;
930 break;
931 }
932 PyList_SetItem(list, j, line);
933 }
934 }
935 if (j == 0)
936 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000937
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000938 /* Check that all entries are indeed strings. If not,
939 apply the same rules as for file.write() and
940 convert the rets to strings. This is slow, but
941 seems to be the only way since all conversion APIs
942 could potentially execute Python code. */
943 for (i = 0; i < j; i++) {
944 PyObject *v = PyList_GET_ITEM(list, i);
945 if (!PyString_Check(v)) {
946 const char *buffer;
947 Py_ssize_t len;
948 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949 PyErr_SetString(PyExc_TypeError,
950 "writelines() "
951 "argument must be "
952 "a sequence of "
953 "strings");
954 goto error;
955 }
956 line = PyString_FromStringAndSize(buffer,
957 len);
958 if (line == NULL)
959 goto error;
960 Py_DECREF(v);
961 PyList_SET_ITEM(list, i, line);
962 }
963 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000964
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000965 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000966
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000967 /* Since we are releasing the global lock, the
968 following code may *not* execute Python code. */
969 Py_BEGIN_ALLOW_THREADS
970 for (i = 0; i < j; i++) {
971 line = PyList_GET_ITEM(list, i);
972 len = PyString_GET_SIZE(line);
973 BZ2_bzWrite (&bzerror, self->fp,
974 PyString_AS_STRING(line), len);
975 if (bzerror != BZ_OK) {
976 Py_BLOCK_THREADS
977 Util_CatchBZ2Error(bzerror);
978 goto error;
979 }
980 }
981 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000982
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000983 if (j < CHUNKSIZE)
984 break;
985 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000987 Py_INCREF(Py_None);
988 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000989
990 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000991 RELEASE_LOCK(self);
992 Py_XDECREF(list);
993 Py_XDECREF(iter);
994 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000995#undef CHUNKSIZE
996}
997
998PyDoc_STRVAR(BZ2File_seek__doc__,
999"seek(offset [, whence]) -> None\n\
1000\n\
1001Move to new file position. Argument offset is a byte count. Optional\n\
1002argument whence defaults to 0 (offset from start of file, offset\n\
1003should be >= 0); other values are 1 (move relative to current position,\n\
1004positive or negative), and 2 (move relative to end of file, usually\n\
1005negative, although many platforms allow seeking beyond the end of a file).\n\
1006\n\
1007Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008the operation may be extremely slow.\n\
1009");
1010
1011static PyObject *
1012BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001014 int where = 0;
1015 PyObject *offobj;
1016 Py_off_t offset;
1017 char small_buffer[SMALLCHUNK];
1018 char *buffer = small_buffer;
1019 size_t buffersize = SMALLCHUNK;
1020 Py_off_t bytesread = 0;
1021 size_t readsize;
1022 int chunksize;
1023 int bzerror;
1024 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001026 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001028#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001029 offset = PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001030#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001031 offset = PyLong_Check(offobj) ?
1032 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001033#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001034 if (PyErr_Occurred())
1035 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001037 ACQUIRE_LOCK(self);
1038 Util_DropReadAhead(self);
1039 switch (self->mode) {
1040 case MODE_READ:
1041 case MODE_READ_EOF:
1042 break;
Tim Peterse3228092002-11-09 04:21:44 +00001043
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001044 case MODE_CLOSED:
1045 PyErr_SetString(PyExc_ValueError,
1046 "I/O operation on closed file");
1047 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001048
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001049 default:
1050 PyErr_SetString(PyExc_IOError,
1051 "seek works only while reading");
1052 goto cleanup;
1053 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001054
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001055 if (where == 2) {
1056 if (self->size == -1) {
1057 assert(self->mode != MODE_READ_EOF);
1058 for (;;) {
1059 Py_BEGIN_ALLOW_THREADS
1060 chunksize = Util_UnivNewlineRead(
1061 &bzerror, self->fp,
1062 buffer, buffersize,
1063 self);
1064 self->pos += chunksize;
1065 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001066
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001067 bytesread += chunksize;
1068 if (bzerror == BZ_STREAM_END) {
1069 break;
1070 } else if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1073 }
1074 }
1075 self->mode = MODE_READ_EOF;
1076 self->size = self->pos;
1077 bytesread = 0;
1078 }
1079 offset = self->size + offset;
1080 } else if (where == 1) {
1081 offset = self->pos + offset;
1082 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001083
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001084 /* Before getting here, offset must be the absolute position the file
1085 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +00001086
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001087 if (offset >= self->pos) {
1088 /* we can move forward */
1089 offset -= self->pos;
1090 } else {
1091 /* we cannot move back, so rewind the stream */
1092 BZ2_bzReadClose(&bzerror, self->fp);
1093 if (self->fp) {
1094 PyFile_DecUseCount((PyFileObject *)self->file);
1095 self->fp = NULL;
1096 }
1097 if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1100 }
1101 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102 if (!ret)
1103 goto cleanup;
1104 Py_DECREF(ret);
1105 ret = NULL;
1106 self->pos = 0;
1107 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108 0, 0, NULL, 0);
1109 if (self->fp)
1110 PyFile_IncUseCount((PyFileObject *)self->file);
1111 if (bzerror != BZ_OK) {
1112 Util_CatchBZ2Error(bzerror);
1113 goto cleanup;
1114 }
1115 self->mode = MODE_READ;
1116 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001117
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001118 if (offset <= 0 || self->mode == MODE_READ_EOF)
1119 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001120
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001121 /* Before getting here, offset must be set to the number of bytes
1122 * to walk forward. */
1123 for (;;) {
1124 if (offset-bytesread > buffersize)
1125 readsize = buffersize;
1126 else
1127 /* offset might be wider that readsize, but the result
1128 * of the subtraction is bound by buffersize (see the
1129 * condition above). buffersize is 8192. */
1130 readsize = (size_t)(offset-bytesread);
1131 Py_BEGIN_ALLOW_THREADS
1132 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133 buffer, readsize, self);
1134 self->pos += chunksize;
1135 Py_END_ALLOW_THREADS
1136 bytesread += chunksize;
1137 if (bzerror == BZ_STREAM_END) {
1138 self->size = self->pos;
1139 self->mode = MODE_READ_EOF;
1140 break;
1141 } else if (bzerror != BZ_OK) {
1142 Util_CatchBZ2Error(bzerror);
1143 goto cleanup;
1144 }
1145 if (bytesread == offset)
1146 break;
1147 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001148
1149exit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 Py_INCREF(Py_None);
1151 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001152
1153cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 RELEASE_LOCK(self);
1155 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156}
1157
1158PyDoc_STRVAR(BZ2File_tell__doc__,
1159"tell() -> int\n\
1160\n\
1161Return the current file position, an integer (may be a long integer).\n\
1162");
1163
1164static PyObject *
1165BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001167 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001169 if (self->mode == MODE_CLOSED) {
1170 PyErr_SetString(PyExc_ValueError,
1171 "I/O operation on closed file");
1172 goto cleanup;
1173 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001174
Georg Brandla8bcecc2005-09-03 07:49:53 +00001175#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001176 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001177#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001178 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001179#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001180
1181cleanup:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001182 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001183}
1184
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185PyDoc_STRVAR(BZ2File_close__doc__,
1186"close() -> None or (perhaps) an integer\n\
1187\n\
1188Close the file. Sets data attribute .closed to true. A closed file\n\
1189cannot be used for further I/O operations. close() may be called more\n\
1190than once without error.\n\
1191");
1192
1193static PyObject *
1194BZ2File_close(BZ2FileObject *self)
1195{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001196 PyObject *ret = NULL;
1197 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001199 ACQUIRE_LOCK(self);
1200 switch (self->mode) {
1201 case MODE_READ:
1202 case MODE_READ_EOF:
1203 BZ2_bzReadClose(&bzerror, self->fp);
1204 break;
1205 case MODE_WRITE:
1206 BZ2_bzWriteClose(&bzerror, self->fp,
1207 0, NULL, NULL);
1208 break;
1209 }
1210 if (self->fp) {
1211 PyFile_DecUseCount((PyFileObject *)self->file);
1212 self->fp = NULL;
1213 }
1214 self->mode = MODE_CLOSED;
1215 ret = PyObject_CallMethod(self->file, "close", NULL);
1216 if (bzerror != BZ_OK) {
1217 Util_CatchBZ2Error(bzerror);
1218 Py_XDECREF(ret);
1219 ret = NULL;
1220 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001221
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001222 RELEASE_LOCK(self);
1223 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001224}
1225
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001226PyDoc_STRVAR(BZ2File_enter_doc,
1227"__enter__() -> self.");
1228
1229static PyObject *
1230BZ2File_enter(BZ2FileObject *self)
1231{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001232 if (self->mode == MODE_CLOSED) {
1233 PyErr_SetString(PyExc_ValueError,
1234 "I/O operation on closed file");
1235 return NULL;
1236 }
1237 Py_INCREF(self);
1238 return (PyObject *) self;
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001239}
1240
1241PyDoc_STRVAR(BZ2File_exit_doc,
1242"__exit__(*excinfo) -> None. Closes the file.");
1243
1244static PyObject *
1245BZ2File_exit(BZ2FileObject *self, PyObject *args)
1246{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001247 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1248 if (!ret)
1249 /* If error occurred, pass through */
1250 return NULL;
1251 Py_DECREF(ret);
1252 Py_RETURN_NONE;
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001253}
1254
1255
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001256static PyObject *BZ2File_getiter(BZ2FileObject *self);
1257
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001258static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001259 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1260 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1261 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1262 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1263 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1264 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1265 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1266 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1267 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1268 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1269 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1270 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271};
1272
1273
1274/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001275/* Getters and setters of BZ2File. */
1276
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001277/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1278static PyObject *
1279BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1280{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001281 switch (self->f_newlinetypes) {
1282 case NEWLINE_UNKNOWN:
1283 Py_INCREF(Py_None);
1284 return Py_None;
1285 case NEWLINE_CR:
1286 return PyString_FromString("\r");
1287 case NEWLINE_LF:
1288 return PyString_FromString("\n");
1289 case NEWLINE_CR|NEWLINE_LF:
1290 return Py_BuildValue("(ss)", "\r", "\n");
1291 case NEWLINE_CRLF:
1292 return PyString_FromString("\r\n");
1293 case NEWLINE_CR|NEWLINE_CRLF:
1294 return Py_BuildValue("(ss)", "\r", "\r\n");
1295 case NEWLINE_LF|NEWLINE_CRLF:
1296 return Py_BuildValue("(ss)", "\n", "\r\n");
1297 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1298 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1299 default:
1300 PyErr_Format(PyExc_SystemError,
1301 "Unknown newlines value 0x%x\n",
1302 self->f_newlinetypes);
1303 return NULL;
1304 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001305}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001306
1307static PyObject *
1308BZ2File_get_closed(BZ2FileObject *self, void *closure)
1309{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001310 return PyInt_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001311}
1312
1313static PyObject *
1314BZ2File_get_mode(BZ2FileObject *self, void *closure)
1315{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001316 return PyObject_GetAttrString(self->file, "mode");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001317}
1318
1319static PyObject *
1320BZ2File_get_name(BZ2FileObject *self, void *closure)
1321{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001322 return PyObject_GetAttrString(self->file, "name");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001323}
1324
1325static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001326 {"closed", (getter)BZ2File_get_closed, NULL,
1327 "True if the file is closed"},
1328 {"newlines", (getter)BZ2File_get_newlines, NULL,
1329 "end-of-line convention used in this file"},
1330 {"mode", (getter)BZ2File_get_mode, NULL,
1331 "file mode ('r', 'w', or 'U')"},
1332 {"name", (getter)BZ2File_get_name, NULL,
1333 "file name"},
1334 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001335};
1336
1337
1338/* ===================================================================== */
1339/* Members of BZ2File_Type. */
1340
1341#undef OFF
1342#define OFF(x) offsetof(BZ2FileObject, x)
1343
1344static PyMemberDef BZ2File_members[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001345 {"softspace", T_INT, OFF(f_softspace), 0,
1346 "flag indicating that a space needs to be printed; used by print"},
1347 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001348};
1349
1350/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001351/* Slot definitions for BZ2File_Type. */
1352
1353static int
1354BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1355{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001356 static char *kwlist[] = {"filename", "mode", "buffering",
1357 "compresslevel", 0};
1358 PyObject *name;
1359 char *mode = "r";
1360 int buffering = -1;
1361 int compresslevel = 9;
1362 int bzerror;
1363 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001365 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001366
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001367 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1368 kwlist, &name, &mode, &buffering,
1369 &compresslevel))
1370 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001371
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 if (compresslevel < 1 || compresslevel > 9) {
1373 PyErr_SetString(PyExc_ValueError,
1374 "compresslevel must be between 1 and 9");
1375 return -1;
1376 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001378 for (;;) {
1379 int error = 0;
1380 switch (*mode) {
1381 case 'r':
1382 case 'w':
1383 if (mode_char)
1384 error = 1;
1385 mode_char = *mode;
1386 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001387
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001388 case 'b':
1389 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001390
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001391 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001392#ifdef __VMS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001393 self->f_univ_newline = 0;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001394#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001395 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001396#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001397 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001399 default:
1400 error = 1;
1401 break;
1402 }
1403 if (error) {
1404 PyErr_Format(PyExc_ValueError,
1405 "invalid mode char %c", *mode);
1406 return -1;
1407 }
1408 mode++;
1409 if (*mode == '\0')
1410 break;
1411 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001412
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001413 if (mode_char == 0) {
1414 mode_char = 'r';
1415 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001416
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001417 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001418
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001419 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1420 name, mode, buffering);
1421 if (self->file == NULL)
1422 return -1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001423
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 /* From now on, we have stuff to dealloc, so jump to error label
1425 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001426
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001427#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001428 self->lock = PyThread_allocate_lock();
1429 if (!self->lock) {
1430 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1431 goto error;
1432 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001433#endif
1434
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001435 if (mode_char == 'r')
1436 self->fp = BZ2_bzReadOpen(&bzerror,
1437 PyFile_AsFile(self->file),
1438 0, 0, NULL, 0);
1439 else
1440 self->fp = BZ2_bzWriteOpen(&bzerror,
1441 PyFile_AsFile(self->file),
1442 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001443
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001444 if (bzerror != BZ_OK) {
1445 Util_CatchBZ2Error(bzerror);
1446 goto error;
1447 }
1448 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001449
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001450 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001451
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001452 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001453
1454error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001455 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001456#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001457 if (self->lock) {
1458 PyThread_free_lock(self->lock);
1459 self->lock = NULL;
1460 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001461#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001462 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001463}
1464
1465static void
1466BZ2File_dealloc(BZ2FileObject *self)
1467{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001468 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001469#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001470 if (self->lock)
1471 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001472#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001473 switch (self->mode) {
1474 case MODE_READ:
1475 case MODE_READ_EOF:
1476 BZ2_bzReadClose(&bzerror, self->fp);
1477 break;
1478 case MODE_WRITE:
1479 BZ2_bzWriteClose(&bzerror, self->fp,
1480 0, NULL, NULL);
1481 break;
1482 }
1483 if (self->fp) {
1484 PyFile_DecUseCount((PyFileObject *)self->file);
1485 self->fp = NULL;
1486 }
1487 Util_DropReadAhead(self);
1488 Py_XDECREF(self->file);
1489 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001490}
1491
1492/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1493static PyObject *
1494BZ2File_getiter(BZ2FileObject *self)
1495{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001496 if (self->mode == MODE_CLOSED) {
1497 PyErr_SetString(PyExc_ValueError,
1498 "I/O operation on closed file");
1499 return NULL;
1500 }
1501 Py_INCREF((PyObject*)self);
1502 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001503}
1504
1505/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1506#define READAHEAD_BUFSIZE 8192
1507static PyObject *
1508BZ2File_iternext(BZ2FileObject *self)
1509{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001510 PyStringObject* ret;
1511 ACQUIRE_LOCK(self);
1512 if (self->mode == MODE_CLOSED) {
1513 RELEASE_LOCK(self);
1514 PyErr_SetString(PyExc_ValueError,
1515 "I/O operation on closed file");
1516 return NULL;
1517 }
1518 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1519 RELEASE_LOCK(self);
1520 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1521 Py_XDECREF(ret);
1522 return NULL;
1523 }
1524 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001525}
1526
1527/* ===================================================================== */
1528/* BZ2File_Type definition. */
1529
1530PyDoc_VAR(BZ2File__doc__) =
1531PyDoc_STR(
1532"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1533\n\
1534Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1535writing. When opened for writing, the file will be created if it doesn't\n\
1536exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1537unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1538is given, must be a number between 1 and 9.\n\
1539")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001540PyDoc_STR(
1541"\n\
1542Add a 'U' to mode to open the file for input with universal newline\n\
1543support. Any line ending in the input file will be seen as a '\\n' in\n\
1544Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1545for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1546'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1547newlines are available only when reading.\n\
1548")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001549;
1550
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001551static PyTypeObject BZ2File_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001552 PyVarObject_HEAD_INIT(NULL, 0)
1553 "bz2.BZ2File", /*tp_name*/
1554 sizeof(BZ2FileObject), /*tp_basicsize*/
1555 0, /*tp_itemsize*/
1556 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1557 0, /*tp_print*/
1558 0, /*tp_getattr*/
1559 0, /*tp_setattr*/
1560 0, /*tp_compare*/
1561 0, /*tp_repr*/
1562 0, /*tp_as_number*/
1563 0, /*tp_as_sequence*/
1564 0, /*tp_as_mapping*/
1565 0, /*tp_hash*/
1566 0, /*tp_call*/
1567 0, /*tp_str*/
1568 PyObject_GenericGetAttr,/*tp_getattro*/
1569 PyObject_GenericSetAttr,/*tp_setattro*/
1570 0, /*tp_as_buffer*/
1571 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1572 BZ2File__doc__, /*tp_doc*/
1573 0, /*tp_traverse*/
1574 0, /*tp_clear*/
1575 0, /*tp_richcompare*/
1576 0, /*tp_weaklistoffset*/
1577 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1578 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1579 BZ2File_methods, /*tp_methods*/
1580 BZ2File_members, /*tp_members*/
1581 BZ2File_getset, /*tp_getset*/
1582 0, /*tp_base*/
1583 0, /*tp_dict*/
1584 0, /*tp_descr_get*/
1585 0, /*tp_descr_set*/
1586 0, /*tp_dictoffset*/
1587 (initproc)BZ2File_init, /*tp_init*/
1588 PyType_GenericAlloc, /*tp_alloc*/
1589 PyType_GenericNew, /*tp_new*/
1590 _PyObject_Del, /*tp_free*/
1591 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001592};
1593
1594
1595/* ===================================================================== */
1596/* Methods of BZ2Comp. */
1597
1598PyDoc_STRVAR(BZ2Comp_compress__doc__,
1599"compress(data) -> string\n\
1600\n\
1601Provide more data to the compressor object. It will return chunks of\n\
1602compressed data whenever possible. When you've finished providing data\n\
1603to compress, call the flush() method to finish the compression process,\n\
1604and return what is left in the internal buffers.\n\
1605");
1606
1607static PyObject *
1608BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1609{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001610 Py_buffer pdata;
1611 char *data;
1612 int datasize;
1613 int bufsize = SMALLCHUNK;
1614 PY_LONG_LONG totalout;
1615 PyObject *ret = NULL;
1616 bz_stream *bzs = &self->bzs;
1617 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001618
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001619 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1620 return NULL;
1621 data = pdata.buf;
1622 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001624 if (datasize == 0) {
1625 PyBuffer_Release(&pdata);
1626 return PyString_FromString("");
1627 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001628
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001629 ACQUIRE_LOCK(self);
1630 if (!self->running) {
1631 PyErr_SetString(PyExc_ValueError,
1632 "this object was already flushed");
1633 goto error;
1634 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001635
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001636 ret = PyString_FromStringAndSize(NULL, bufsize);
1637 if (!ret)
1638 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001639
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001640 bzs->next_in = data;
1641 bzs->avail_in = datasize;
1642 bzs->next_out = BUF(ret);
1643 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001644
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001645 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001646
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001647 for (;;) {
1648 Py_BEGIN_ALLOW_THREADS
1649 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1650 Py_END_ALLOW_THREADS
1651 if (bzerror != BZ_RUN_OK) {
1652 Util_CatchBZ2Error(bzerror);
1653 goto error;
1654 }
1655 if (bzs->avail_in == 0)
1656 break; /* no more input data */
1657 if (bzs->avail_out == 0) {
1658 bufsize = Util_NewBufferSize(bufsize);
1659 if (_PyString_Resize(&ret, bufsize) < 0) {
1660 BZ2_bzCompressEnd(bzs);
1661 goto error;
1662 }
1663 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1664 - totalout);
1665 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1666 }
1667 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001668
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001669 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001670
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001671 RELEASE_LOCK(self);
1672 PyBuffer_Release(&pdata);
1673 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001674
1675error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001676 RELEASE_LOCK(self);
1677 PyBuffer_Release(&pdata);
1678 Py_XDECREF(ret);
1679 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001680}
1681
1682PyDoc_STRVAR(BZ2Comp_flush__doc__,
1683"flush() -> string\n\
1684\n\
1685Finish the compression process and return what is left in internal buffers.\n\
1686You must not use the compressor object after calling this method.\n\
1687");
1688
1689static PyObject *
1690BZ2Comp_flush(BZ2CompObject *self)
1691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 int bufsize = SMALLCHUNK;
1693 PyObject *ret = NULL;
1694 bz_stream *bzs = &self->bzs;
1695 PY_LONG_LONG totalout;
1696 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001697
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001698 ACQUIRE_LOCK(self);
1699 if (!self->running) {
1700 PyErr_SetString(PyExc_ValueError, "object was already "
1701 "flushed");
1702 goto error;
1703 }
1704 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001705
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001706 ret = PyString_FromStringAndSize(NULL, bufsize);
1707 if (!ret)
1708 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001709
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001710 bzs->next_out = BUF(ret);
1711 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001712
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001713 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 for (;;) {
1716 Py_BEGIN_ALLOW_THREADS
1717 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1718 Py_END_ALLOW_THREADS
1719 if (bzerror == BZ_STREAM_END) {
1720 break;
1721 } else if (bzerror != BZ_FINISH_OK) {
1722 Util_CatchBZ2Error(bzerror);
1723 goto error;
1724 }
1725 if (bzs->avail_out == 0) {
1726 bufsize = Util_NewBufferSize(bufsize);
1727 if (_PyString_Resize(&ret, bufsize) < 0)
1728 goto error;
1729 bzs->next_out = BUF(ret);
1730 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1731 - totalout);
1732 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1733 }
1734 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001735
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001736 if (bzs->avail_out != 0)
1737 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001738
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001739 RELEASE_LOCK(self);
1740 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001741
1742error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001743 RELEASE_LOCK(self);
1744 Py_XDECREF(ret);
1745 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001746}
1747
1748static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001749 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1750 BZ2Comp_compress__doc__},
1751 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1752 BZ2Comp_flush__doc__},
1753 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001754};
1755
1756
1757/* ===================================================================== */
1758/* Slot definitions for BZ2Comp_Type. */
1759
1760static int
1761BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1762{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001763 int compresslevel = 9;
1764 int bzerror;
1765 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001766
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001767 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1768 kwlist, &compresslevel))
1769 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001770
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001771 if (compresslevel < 1 || compresslevel > 9) {
1772 PyErr_SetString(PyExc_ValueError,
1773 "compresslevel must be between 1 and 9");
1774 goto error;
1775 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001776
1777#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001778 self->lock = PyThread_allocate_lock();
1779 if (!self->lock) {
1780 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1781 goto error;
1782 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001783#endif
1784
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001785 memset(&self->bzs, 0, sizeof(bz_stream));
1786 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1787 if (bzerror != BZ_OK) {
1788 Util_CatchBZ2Error(bzerror);
1789 goto error;
1790 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001791
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001792 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001793
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001794 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001795error:
1796#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001797 if (self->lock) {
1798 PyThread_free_lock(self->lock);
1799 self->lock = NULL;
1800 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001802 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001803}
1804
1805static void
1806BZ2Comp_dealloc(BZ2CompObject *self)
1807{
1808#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001809 if (self->lock)
1810 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001811#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001812 BZ2_bzCompressEnd(&self->bzs);
1813 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001814}
1815
1816
1817/* ===================================================================== */
1818/* BZ2Comp_Type definition. */
1819
1820PyDoc_STRVAR(BZ2Comp__doc__,
1821"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1822\n\
1823Create a new compressor object. This object may be used to compress\n\
1824data sequentially. If you want to compress data in one shot, use the\n\
1825compress() function instead. The compresslevel parameter, if given,\n\
1826must be a number between 1 and 9.\n\
1827");
1828
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001829static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 PyVarObject_HEAD_INIT(NULL, 0)
1831 "bz2.BZ2Compressor", /*tp_name*/
1832 sizeof(BZ2CompObject), /*tp_basicsize*/
1833 0, /*tp_itemsize*/
1834 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1835 0, /*tp_print*/
1836 0, /*tp_getattr*/
1837 0, /*tp_setattr*/
1838 0, /*tp_compare*/
1839 0, /*tp_repr*/
1840 0, /*tp_as_number*/
1841 0, /*tp_as_sequence*/
1842 0, /*tp_as_mapping*/
1843 0, /*tp_hash*/
1844 0, /*tp_call*/
1845 0, /*tp_str*/
1846 PyObject_GenericGetAttr,/*tp_getattro*/
1847 PyObject_GenericSetAttr,/*tp_setattro*/
1848 0, /*tp_as_buffer*/
1849 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1850 BZ2Comp__doc__, /*tp_doc*/
1851 0, /*tp_traverse*/
1852 0, /*tp_clear*/
1853 0, /*tp_richcompare*/
1854 0, /*tp_weaklistoffset*/
1855 0, /*tp_iter*/
1856 0, /*tp_iternext*/
1857 BZ2Comp_methods, /*tp_methods*/
1858 0, /*tp_members*/
1859 0, /*tp_getset*/
1860 0, /*tp_base*/
1861 0, /*tp_dict*/
1862 0, /*tp_descr_get*/
1863 0, /*tp_descr_set*/
1864 0, /*tp_dictoffset*/
1865 (initproc)BZ2Comp_init, /*tp_init*/
1866 PyType_GenericAlloc, /*tp_alloc*/
1867 PyType_GenericNew, /*tp_new*/
1868 _PyObject_Del, /*tp_free*/
1869 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001870};
1871
1872
1873/* ===================================================================== */
1874/* Members of BZ2Decomp. */
1875
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001876#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001877#define OFF(x) offsetof(BZ2DecompObject, x)
1878
1879static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001880 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1881 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001882};
1883
1884
1885/* ===================================================================== */
1886/* Methods of BZ2Decomp. */
1887
1888PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1889"decompress(data) -> string\n\
1890\n\
1891Provide more data to the decompressor object. It will return chunks\n\
1892of decompressed data whenever possible. If you try to decompress data\n\
1893after the end of stream is found, EOFError will be raised. If any data\n\
1894was found after the end of stream, it'll be ignored and saved in\n\
1895unused_data attribute.\n\
1896");
1897
1898static PyObject *
1899BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1900{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001901 Py_buffer pdata;
1902 char *data;
1903 int datasize;
1904 int bufsize = SMALLCHUNK;
1905 PY_LONG_LONG totalout;
1906 PyObject *ret = NULL;
1907 bz_stream *bzs = &self->bzs;
1908 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001909
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001910 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1911 return NULL;
1912 data = pdata.buf;
1913 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001915 ACQUIRE_LOCK(self);
1916 if (!self->running) {
1917 PyErr_SetString(PyExc_EOFError, "end of stream was "
1918 "already found");
1919 goto error;
1920 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001921
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001922 ret = PyString_FromStringAndSize(NULL, bufsize);
1923 if (!ret)
1924 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001926 bzs->next_in = data;
1927 bzs->avail_in = datasize;
1928 bzs->next_out = BUF(ret);
1929 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001930
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001931 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001932
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001933 for (;;) {
1934 Py_BEGIN_ALLOW_THREADS
1935 bzerror = BZ2_bzDecompress(bzs);
1936 Py_END_ALLOW_THREADS
1937 if (bzerror == BZ_STREAM_END) {
1938 if (bzs->avail_in != 0) {
1939 Py_DECREF(self->unused_data);
1940 self->unused_data =
1941 PyString_FromStringAndSize(bzs->next_in,
1942 bzs->avail_in);
1943 }
1944 self->running = 0;
1945 break;
1946 }
1947 if (bzerror != BZ_OK) {
1948 Util_CatchBZ2Error(bzerror);
1949 goto error;
1950 }
1951 if (bzs->avail_in == 0)
1952 break; /* no more input data */
1953 if (bzs->avail_out == 0) {
1954 bufsize = Util_NewBufferSize(bufsize);
1955 if (_PyString_Resize(&ret, bufsize) < 0) {
1956 BZ2_bzDecompressEnd(bzs);
1957 goto error;
1958 }
1959 bzs->next_out = BUF(ret);
1960 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1961 - totalout);
1962 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1963 }
1964 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001965
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001966 if (bzs->avail_out != 0)
1967 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001968
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001969 RELEASE_LOCK(self);
1970 PyBuffer_Release(&pdata);
1971 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001972
1973error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001974 RELEASE_LOCK(self);
1975 PyBuffer_Release(&pdata);
1976 Py_XDECREF(ret);
1977 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001978}
1979
1980static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001981 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1982 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001983};
1984
1985
1986/* ===================================================================== */
1987/* Slot definitions for BZ2Decomp_Type. */
1988
1989static int
1990BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001992 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001993
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001994 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1995 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001996
1997#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001998 self->lock = PyThread_allocate_lock();
1999 if (!self->lock) {
2000 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2001 goto error;
2002 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002003#endif
2004
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002005 self->unused_data = PyString_FromString("");
2006 if (!self->unused_data)
2007 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002008
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002009 memset(&self->bzs, 0, sizeof(bz_stream));
2010 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2011 if (bzerror != BZ_OK) {
2012 Util_CatchBZ2Error(bzerror);
2013 goto error;
2014 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002016 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002017
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002018 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019
2020error:
2021#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002022 if (self->lock) {
2023 PyThread_free_lock(self->lock);
2024 self->lock = NULL;
2025 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002026#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002027 Py_CLEAR(self->unused_data);
2028 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002029}
2030
2031static void
2032BZ2Decomp_dealloc(BZ2DecompObject *self)
2033{
2034#ifdef WITH_THREAD
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002035 if (self->lock)
2036 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002037#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002038 Py_XDECREF(self->unused_data);
2039 BZ2_bzDecompressEnd(&self->bzs);
2040 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002041}
2042
2043
2044/* ===================================================================== */
2045/* BZ2Decomp_Type definition. */
2046
2047PyDoc_STRVAR(BZ2Decomp__doc__,
2048"BZ2Decompressor() -> decompressor object\n\
2049\n\
2050Create a new decompressor object. This object may be used to decompress\n\
2051data sequentially. If you want to decompress data in one shot, use the\n\
2052decompress() function instead.\n\
2053");
2054
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002055static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002056 PyVarObject_HEAD_INIT(NULL, 0)
2057 "bz2.BZ2Decompressor", /*tp_name*/
2058 sizeof(BZ2DecompObject), /*tp_basicsize*/
2059 0, /*tp_itemsize*/
2060 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2061 0, /*tp_print*/
2062 0, /*tp_getattr*/
2063 0, /*tp_setattr*/
2064 0, /*tp_compare*/
2065 0, /*tp_repr*/
2066 0, /*tp_as_number*/
2067 0, /*tp_as_sequence*/
2068 0, /*tp_as_mapping*/
2069 0, /*tp_hash*/
2070 0, /*tp_call*/
2071 0, /*tp_str*/
2072 PyObject_GenericGetAttr,/*tp_getattro*/
2073 PyObject_GenericSetAttr,/*tp_setattro*/
2074 0, /*tp_as_buffer*/
2075 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2076 BZ2Decomp__doc__, /*tp_doc*/
2077 0, /*tp_traverse*/
2078 0, /*tp_clear*/
2079 0, /*tp_richcompare*/
2080 0, /*tp_weaklistoffset*/
2081 0, /*tp_iter*/
2082 0, /*tp_iternext*/
2083 BZ2Decomp_methods, /*tp_methods*/
2084 BZ2Decomp_members, /*tp_members*/
2085 0, /*tp_getset*/
2086 0, /*tp_base*/
2087 0, /*tp_dict*/
2088 0, /*tp_descr_get*/
2089 0, /*tp_descr_set*/
2090 0, /*tp_dictoffset*/
2091 (initproc)BZ2Decomp_init, /*tp_init*/
2092 PyType_GenericAlloc, /*tp_alloc*/
2093 PyType_GenericNew, /*tp_new*/
2094 _PyObject_Del, /*tp_free*/
2095 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002096};
2097
2098
2099/* ===================================================================== */
2100/* Module functions. */
2101
2102PyDoc_STRVAR(bz2_compress__doc__,
2103"compress(data [, compresslevel=9]) -> string\n\
2104\n\
2105Compress data in one shot. If you want to compress data sequentially,\n\
2106use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2107given, must be a number between 1 and 9.\n\
2108");
2109
2110static PyObject *
2111bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2112{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002113 int compresslevel=9;
2114 Py_buffer pdata;
2115 char *data;
2116 int datasize;
2117 int bufsize;
2118 PyObject *ret = NULL;
2119 bz_stream _bzs;
2120 bz_stream *bzs = &_bzs;
2121 int bzerror;
2122 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002123
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002124 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2125 kwlist, &pdata,
2126 &compresslevel))
2127 return NULL;
2128 data = pdata.buf;
2129 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002130
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002131 if (compresslevel < 1 || compresslevel > 9) {
2132 PyErr_SetString(PyExc_ValueError,
2133 "compresslevel must be between 1 and 9");
2134 PyBuffer_Release(&pdata);
2135 return NULL;
2136 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002137
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002138 /* Conforming to bz2 manual, this is large enough to fit compressed
2139 * data in one shot. We will check it later anyway. */
2140 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002141
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002142 ret = PyString_FromStringAndSize(NULL, bufsize);
2143 if (!ret) {
2144 PyBuffer_Release(&pdata);
2145 return NULL;
2146 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002147
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002148 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002150 bzs->next_in = data;
2151 bzs->avail_in = datasize;
2152 bzs->next_out = BUF(ret);
2153 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002154
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002155 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2156 if (bzerror != BZ_OK) {
2157 Util_CatchBZ2Error(bzerror);
2158 PyBuffer_Release(&pdata);
2159 Py_DECREF(ret);
2160 return NULL;
2161 }
Tim Peterse3228092002-11-09 04:21:44 +00002162
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002163 for (;;) {
2164 Py_BEGIN_ALLOW_THREADS
2165 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2166 Py_END_ALLOW_THREADS
2167 if (bzerror == BZ_STREAM_END) {
2168 break;
2169 } else if (bzerror != BZ_FINISH_OK) {
2170 BZ2_bzCompressEnd(bzs);
2171 Util_CatchBZ2Error(bzerror);
2172 PyBuffer_Release(&pdata);
2173 Py_DECREF(ret);
2174 return NULL;
2175 }
2176 if (bzs->avail_out == 0) {
2177 bufsize = Util_NewBufferSize(bufsize);
2178 if (_PyString_Resize(&ret, bufsize) < 0) {
2179 BZ2_bzCompressEnd(bzs);
2180 PyBuffer_Release(&pdata);
2181 Py_DECREF(ret);
2182 return NULL;
2183 }
2184 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2185 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2186 }
2187 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002188
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002189 if (bzs->avail_out != 0)
2190 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2191 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002192
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002193 PyBuffer_Release(&pdata);
2194 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002195}
2196
2197PyDoc_STRVAR(bz2_decompress__doc__,
2198"decompress(data) -> decompressed data\n\
2199\n\
2200Decompress data in one shot. If you want to decompress data sequentially,\n\
2201use an instance of BZ2Decompressor instead.\n\
2202");
2203
2204static PyObject *
2205bz2_decompress(PyObject *self, PyObject *args)
2206{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002207 Py_buffer pdata;
2208 char *data;
2209 int datasize;
2210 int bufsize = SMALLCHUNK;
2211 PyObject *ret;
2212 bz_stream _bzs;
2213 bz_stream *bzs = &_bzs;
2214 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002215
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002216 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2217 return NULL;
2218 data = pdata.buf;
2219 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002220
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002221 if (datasize == 0) {
2222 PyBuffer_Release(&pdata);
2223 return PyString_FromString("");
2224 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002225
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002226 ret = PyString_FromStringAndSize(NULL, bufsize);
2227 if (!ret) {
2228 PyBuffer_Release(&pdata);
2229 return NULL;
2230 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002231
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002232 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002233
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002234 bzs->next_in = data;
2235 bzs->avail_in = datasize;
2236 bzs->next_out = BUF(ret);
2237 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002238
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002239 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2240 if (bzerror != BZ_OK) {
2241 Util_CatchBZ2Error(bzerror);
2242 Py_DECREF(ret);
2243 PyBuffer_Release(&pdata);
2244 return NULL;
2245 }
Tim Peterse3228092002-11-09 04:21:44 +00002246
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002247 for (;;) {
2248 Py_BEGIN_ALLOW_THREADS
2249 bzerror = BZ2_bzDecompress(bzs);
2250 Py_END_ALLOW_THREADS
2251 if (bzerror == BZ_STREAM_END) {
2252 break;
2253 } else if (bzerror != BZ_OK) {
2254 BZ2_bzDecompressEnd(bzs);
2255 Util_CatchBZ2Error(bzerror);
2256 PyBuffer_Release(&pdata);
2257 Py_DECREF(ret);
2258 return NULL;
2259 }
2260 if (bzs->avail_in == 0) {
2261 BZ2_bzDecompressEnd(bzs);
2262 PyErr_SetString(PyExc_ValueError,
2263 "couldn't find end of stream");
2264 PyBuffer_Release(&pdata);
2265 Py_DECREF(ret);
2266 return NULL;
2267 }
2268 if (bzs->avail_out == 0) {
2269 bufsize = Util_NewBufferSize(bufsize);
2270 if (_PyString_Resize(&ret, bufsize) < 0) {
2271 BZ2_bzDecompressEnd(bzs);
2272 PyBuffer_Release(&pdata);
2273 Py_DECREF(ret);
2274 return NULL;
2275 }
2276 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2277 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2278 }
2279 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002280
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002281 if (bzs->avail_out != 0)
2282 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2283 BZ2_bzDecompressEnd(bzs);
2284 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002285
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002286 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002287}
2288
2289static PyMethodDef bz2_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002290 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2291 bz2_compress__doc__},
2292 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2293 bz2_decompress__doc__},
2294 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002295};
2296
2297/* ===================================================================== */
2298/* Initialization function. */
2299
2300PyDoc_STRVAR(bz2__doc__,
2301"The python bz2 module provides a comprehensive interface for\n\
2302the bz2 compression library. It implements a complete file\n\
2303interface, one shot (de)compression functions, and types for\n\
2304sequential (de)compression.\n\
2305");
2306
Neal Norwitz21d896c2003-07-01 20:15:21 +00002307PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002308initbz2(void)
2309{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002310 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002311
Antoine Pitrou2c7d6852010-09-23 19:55:24 +00002312 if (PyType_Ready(&BZ2File_Type) < 0)
2313 return;
2314 if (PyType_Ready(&BZ2Comp_Type) < 0)
2315 return;
2316 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2317 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002318
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002319 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2320 if (m == NULL)
2321 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002322
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002323 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002324
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002325 Py_INCREF(&BZ2File_Type);
2326 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002327
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002328 Py_INCREF(&BZ2Comp_Type);
2329 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002330
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002331 Py_INCREF(&BZ2Decomp_Type);
2332 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002333}