blob: c3dae7a671eba8b5ff25bf0c0f7a3d5fdeceb426 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimese93237d2007-12-19 02:37:44 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitroudd629662009-10-27 17:41:58 +000081#define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
94#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000103 PyObject_HEAD
104 PyObject *file;
105
106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
109
110 int f_softspace; /* Flag used by 'print' command */
111
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000115
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116 BZFILE *fp;
117 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000118 Py_off_t pos;
119 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120#ifdef WITH_THREAD
121 PyThread_type_lock lock;
122#endif
123} BZ2FileObject;
124
125typedef struct {
126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
129#ifdef WITH_THREAD
130 PyThread_type_lock lock;
131#endif
132} BZ2CompObject;
133
134typedef struct {
135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
139#ifdef WITH_THREAD
140 PyThread_type_lock lock;
141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
147static int
148Util_CatchBZ2Error(int bzerror)
149{
150 int ret = 0;
151 switch(bzerror) {
152 case BZ_OK:
153 case BZ_STREAM_END:
154 break;
155
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000156#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000157 case BZ_CONFIG_ERROR:
158 PyErr_SetString(PyExc_SystemError,
159 "the bz2 library was not compiled "
160 "correctly");
161 ret = 1;
162 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000163#endif
Tim Peterse3228092002-11-09 04:21:44 +0000164
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165 case BZ_PARAM_ERROR:
166 PyErr_SetString(PyExc_ValueError,
167 "the bz2 library has received wrong "
168 "parameters");
169 ret = 1;
170 break;
Tim Peterse3228092002-11-09 04:21:44 +0000171
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000172 case BZ_MEM_ERROR:
173 PyErr_NoMemory();
174 ret = 1;
175 break;
176
177 case BZ_DATA_ERROR:
178 case BZ_DATA_ERROR_MAGIC:
179 PyErr_SetString(PyExc_IOError, "invalid data stream");
180 ret = 1;
181 break;
182
183 case BZ_IO_ERROR:
184 PyErr_SetString(PyExc_IOError, "unknown IO error");
185 ret = 1;
186 break;
187
188 case BZ_UNEXPECTED_EOF:
189 PyErr_SetString(PyExc_EOFError,
190 "compressed file ended before the "
191 "logical end-of-stream was detected");
192 ret = 1;
193 break;
194
195 case BZ_SEQUENCE_ERROR:
196 PyErr_SetString(PyExc_RuntimeError,
197 "wrong sequence of bz2 library "
198 "commands used");
199 ret = 1;
200 break;
201 }
202 return ret;
203}
204
205#if BUFSIZ < 8192
206#define SMALLCHUNK 8192
207#else
208#define SMALLCHUNK BUFSIZ
209#endif
210
211#if SIZEOF_INT < 4
212#define BIGCHUNK (512 * 32)
213#else
214#define BIGCHUNK (512 * 1024)
215#endif
216
217/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
218static size_t
219Util_NewBufferSize(size_t currentsize)
220{
221 if (currentsize > SMALLCHUNK) {
222 /* Keep doubling until we reach BIGCHUNK;
223 then keep adding BIGCHUNK. */
224 if (currentsize <= BIGCHUNK)
225 return currentsize + currentsize;
226 else
227 return currentsize + BIGCHUNK;
228 }
229 return currentsize + SMALLCHUNK;
230}
231
232/* This is a hacked version of Python's fileobject.c:get_line(). */
233static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000234Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000235{
236 char c;
237 char *buf, *end;
238 size_t total_v_size; /* total # of slots in buffer */
239 size_t used_v_size; /* # used slots in buffer */
240 size_t increment; /* amount to increment the buffer */
241 PyObject *v;
242 int bzerror;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000243 int bytes_read;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000244 int newlinetypes = f->f_newlinetypes;
245 int skipnextlf = f->f_skipnextlf;
246 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000247
248 total_v_size = n > 0 ? n : 100;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000250 if (v == NULL)
251 return NULL;
252
253 buf = BUF(v);
254 end = buf + total_v_size;
255
256 for (;;) {
257 Py_BEGIN_ALLOW_THREADS
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000258 while (buf != end) {
259 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
260 f->pos++;
261 if (bytes_read == 0) break;
262 if (univ_newline) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000263 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000264 skipnextlf = 0;
265 if (c == '\n') {
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000266 /* Seeing a \n here with skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 * saw a \r before.
268 */
269 newlinetypes |= NEWLINE_CRLF;
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000270 if (bzerror != BZ_OK) break;
271 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
272 f->pos++;
273 if (bytes_read == 0) break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000274 } else {
275 newlinetypes |= NEWLINE_CR;
276 }
277 }
278 if (c == '\r') {
279 skipnextlf = 1;
280 c = '\n';
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000281 } else if (c == '\n')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000282 newlinetypes |= NEWLINE_LF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000283 }
Sean Reifscheider8335acb2007-09-17 05:45:04 +0000284 *buf++ = c;
285 if (bzerror != BZ_OK || c == '\n') break;
286 }
287 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
288 newlinetypes |= NEWLINE_CR;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000289 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000290 f->f_newlinetypes = newlinetypes;
291 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000292 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000293 f->size = f->pos;
294 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000295 break;
296 } else if (bzerror != BZ_OK) {
297 Util_CatchBZ2Error(bzerror);
298 Py_DECREF(v);
299 return NULL;
300 }
301 if (c == '\n')
302 break;
303 /* Must be because buf == end */
304 if (n > 0)
305 break;
306 used_v_size = total_v_size;
307 increment = total_v_size >> 2; /* mild exponential growth */
308 total_v_size += increment;
309 if (total_v_size > INT_MAX) {
310 PyErr_SetString(PyExc_OverflowError,
311 "line is longer than a Python string can hold");
312 Py_DECREF(v);
313 return NULL;
314 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000315 if (_PyString_Resize(&v, total_v_size) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000316 return NULL;
317 buf = BUF(v) + used_v_size;
318 end = BUF(v) + total_v_size;
319 }
320
321 used_v_size = buf - BUF(v);
322 if (used_v_size != total_v_size)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000323 _PyString_Resize(&v, used_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000324 return v;
325}
326
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327/* This is a hacked version of Python's
328 * fileobject.c:Py_UniversalNewlineFread(). */
329size_t
330Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000331 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332{
333 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000334 int newlinetypes, skipnextlf;
335
336 assert(buf != NULL);
337 assert(stream != NULL);
338
339 if (!f->f_univ_newline)
340 return BZ2_bzRead(bzerror, stream, buf, n);
341
342 newlinetypes = f->f_newlinetypes;
343 skipnextlf = f->f_skipnextlf;
344
345 /* Invariant: n is the number of bytes remaining to be filled
346 * in the buffer.
347 */
348 while (n) {
349 size_t nread;
350 int shortread;
351 char *src = dst;
352
353 nread = BZ2_bzRead(bzerror, stream, dst, n);
354 assert(nread <= n);
355 n -= nread; /* assuming 1 byte out for each in; will adjust */
356 shortread = n != 0; /* true iff EOF or error */
357 while (nread--) {
358 char c = *src++;
359 if (c == '\r') {
360 /* Save as LF and set flag to skip next LF. */
361 *dst++ = '\n';
362 skipnextlf = 1;
363 }
364 else if (skipnextlf && c == '\n') {
365 /* Skip LF, and remember we saw CR LF. */
366 skipnextlf = 0;
367 newlinetypes |= NEWLINE_CRLF;
368 ++n;
369 }
370 else {
371 /* Normal char to be stored in buffer. Also
372 * update the newlinetypes flag if either this
373 * is an LF or the previous char was a CR.
374 */
375 if (c == '\n')
376 newlinetypes |= NEWLINE_LF;
377 else if (skipnextlf)
378 newlinetypes |= NEWLINE_CR;
379 *dst++ = c;
380 skipnextlf = 0;
381 }
382 }
383 if (shortread) {
384 /* If this is EOF, update type flags. */
385 if (skipnextlf && *bzerror == BZ_STREAM_END)
386 newlinetypes |= NEWLINE_CR;
387 break;
388 }
389 }
390 f->f_newlinetypes = newlinetypes;
391 f->f_skipnextlf = skipnextlf;
392 return dst - buf;
393}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394
395/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
396static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000397Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000398{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399 if (f->f_buf != NULL) {
400 PyMem_Free(f->f_buf);
401 f->f_buf = NULL;
402 }
403}
404
405/* This is a hacked version of Python's fileobject.c:readahead(). */
406static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000407Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408{
409 int chunksize;
410 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000411
412 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000413 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414 return 0;
415 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000416 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000417 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000418 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000419 f->f_bufptr = f->f_buf;
420 f->f_bufend = f->f_buf;
421 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000422 }
423 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Antoine Pitrou016b3662008-08-09 17:22:25 +0000424 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000450 PyStringObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000518 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000566 if (_PyString_Resize(&ret, buffersize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000573 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000603 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000616 ret = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000710 big_buffer = PyString_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000714 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000729 line = PyString_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000752 line = PyString_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000762 PyString_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000800 Py_buffer pbuf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000801 char *buf;
802 int len;
803 int bzerror;
804
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000805 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000806 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000807 buf = pbuf.buf;
808 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000809
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 ACQUIRE_LOCK(self);
811 switch (self->mode) {
812 case MODE_WRITE:
813 break;
Tim Peterse3228092002-11-09 04:21:44 +0000814
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000815 case MODE_CLOSED:
816 PyErr_SetString(PyExc_ValueError,
817 "I/O operation on closed file");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000818 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000819
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000820 default:
821 PyErr_SetString(PyExc_IOError,
822 "file is not ready for writing");
Georg Brandl3335a7a2006-08-14 21:42:55 +0000823 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824 }
825
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000826 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000827
828 Py_BEGIN_ALLOW_THREADS
829 BZ2_bzWrite (&bzerror, self->fp, buf, len);
830 self->pos += len;
831 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000832
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000833 if (bzerror != BZ_OK) {
834 Util_CatchBZ2Error(bzerror);
835 goto cleanup;
836 }
Tim Peterse3228092002-11-09 04:21:44 +0000837
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000838 Py_INCREF(Py_None);
839 ret = Py_None;
840
841cleanup:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000842 PyBuffer_Release(&pbuf);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000843 RELEASE_LOCK(self);
844 return ret;
845}
846
847PyDoc_STRVAR(BZ2File_writelines__doc__,
848"writelines(sequence_of_strings) -> None\n\
849\n\
850Write the sequence of strings to the file. Note that newlines are not\n\
851added. The sequence can be any iterable object producing strings. This is\n\
852equivalent to calling write() for each string.\n\
853");
854
855/* This is a hacked version of Python's fileobject.c:file_writelines(). */
856static PyObject *
857BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
858{
859#define CHUNKSIZE 1000
860 PyObject *list = NULL;
861 PyObject *iter = NULL;
862 PyObject *ret = NULL;
863 PyObject *line;
864 int i, j, index, len, islist;
865 int bzerror;
866
867 ACQUIRE_LOCK(self);
Georg Brandl3335a7a2006-08-14 21:42:55 +0000868 switch (self->mode) {
869 case MODE_WRITE:
870 break;
871
872 case MODE_CLOSED:
873 PyErr_SetString(PyExc_ValueError,
874 "I/O operation on closed file");
875 goto error;
876
877 default:
878 PyErr_SetString(PyExc_IOError,
879 "file is not ready for writing");
880 goto error;
881 }
882
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000883 islist = PyList_Check(seq);
884 if (!islist) {
885 iter = PyObject_GetIter(seq);
886 if (iter == NULL) {
887 PyErr_SetString(PyExc_TypeError,
888 "writelines() requires an iterable argument");
889 goto error;
890 }
891 list = PyList_New(CHUNKSIZE);
892 if (list == NULL)
893 goto error;
894 }
895
896 /* Strategy: slurp CHUNKSIZE lines into a private list,
897 checking that they are all strings, then write that list
898 without holding the interpreter lock, then come back for more. */
899 for (index = 0; ; index += CHUNKSIZE) {
900 if (islist) {
901 Py_XDECREF(list);
902 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
903 if (list == NULL)
904 goto error;
905 j = PyList_GET_SIZE(list);
906 }
907 else {
908 for (j = 0; j < CHUNKSIZE; j++) {
909 line = PyIter_Next(iter);
910 if (line == NULL) {
911 if (PyErr_Occurred())
912 goto error;
913 break;
914 }
915 PyList_SetItem(list, j, line);
916 }
917 }
918 if (j == 0)
919 break;
920
921 /* Check that all entries are indeed strings. If not,
922 apply the same rules as for file.write() and
923 convert the rets to strings. This is slow, but
924 seems to be the only way since all conversion APIs
925 could potentially execute Python code. */
926 for (i = 0; i < j; i++) {
927 PyObject *v = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000928 if (!PyString_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000929 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000930 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000931 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
932 PyErr_SetString(PyExc_TypeError,
933 "writelines() "
934 "argument must be "
935 "a sequence of "
936 "strings");
937 goto error;
938 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000939 line = PyString_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000940 len);
941 if (line == NULL)
942 goto error;
943 Py_DECREF(v);
944 PyList_SET_ITEM(list, i, line);
945 }
946 }
947
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000948 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949
950 /* Since we are releasing the global lock, the
951 following code may *not* execute Python code. */
952 Py_BEGIN_ALLOW_THREADS
953 for (i = 0; i < j; i++) {
954 line = PyList_GET_ITEM(list, i);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000955 len = PyString_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000956 BZ2_bzWrite (&bzerror, self->fp,
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000957 PyString_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000958 if (bzerror != BZ_OK) {
959 Py_BLOCK_THREADS
960 Util_CatchBZ2Error(bzerror);
961 goto error;
962 }
963 }
964 Py_END_ALLOW_THREADS
965
966 if (j < CHUNKSIZE)
967 break;
968 }
969
970 Py_INCREF(Py_None);
971 ret = Py_None;
972
973 error:
974 RELEASE_LOCK(self);
975 Py_XDECREF(list);
976 Py_XDECREF(iter);
977 return ret;
978#undef CHUNKSIZE
979}
980
981PyDoc_STRVAR(BZ2File_seek__doc__,
982"seek(offset [, whence]) -> None\n\
983\n\
984Move to new file position. Argument offset is a byte count. Optional\n\
985argument whence defaults to 0 (offset from start of file, offset\n\
986should be >= 0); other values are 1 (move relative to current position,\n\
987positive or negative), and 2 (move relative to end of file, usually\n\
988negative, although many platforms allow seeking beyond the end of a file).\n\
989\n\
990Note that seeking of bz2 files is emulated, and depending on the parameters\n\
991the operation may be extremely slow.\n\
992");
993
994static PyObject *
995BZ2File_seek(BZ2FileObject *self, PyObject *args)
996{
997 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000998 PyObject *offobj;
999 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 char small_buffer[SMALLCHUNK];
1001 char *buffer = small_buffer;
1002 size_t buffersize = SMALLCHUNK;
Andrew M. Kuchling44b054b2006-12-18 19:22:24 +00001003 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +00001004 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001005 int chunksize;
1006 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001007 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001008
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001009 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1010 return NULL;
1011#if !defined(HAVE_LARGEFILE_SUPPORT)
1012 offset = PyInt_AsLong(offobj);
1013#else
1014 offset = PyLong_Check(offobj) ?
1015 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1016#endif
1017 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001018 return NULL;
1019
1020 ACQUIRE_LOCK(self);
1021 Util_DropReadAhead(self);
1022 switch (self->mode) {
1023 case MODE_READ:
1024 case MODE_READ_EOF:
1025 break;
Tim Peterse3228092002-11-09 04:21:44 +00001026
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001027 case MODE_CLOSED:
1028 PyErr_SetString(PyExc_ValueError,
1029 "I/O operation on closed file");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001030 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001031
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001032 default:
1033 PyErr_SetString(PyExc_IOError,
1034 "seek works only while reading");
Neal Norwitzd3f91902006-09-23 04:11:38 +00001035 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001036 }
1037
Georg Brandl47fab922006-02-18 21:57:25 +00001038 if (where == 2) {
1039 if (self->size == -1) {
1040 assert(self->mode != MODE_READ_EOF);
1041 for (;;) {
1042 Py_BEGIN_ALLOW_THREADS
1043 chunksize = Util_UnivNewlineRead(
1044 &bzerror, self->fp,
1045 buffer, buffersize,
1046 self);
1047 self->pos += chunksize;
1048 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049
Georg Brandl47fab922006-02-18 21:57:25 +00001050 bytesread += chunksize;
1051 if (bzerror == BZ_STREAM_END) {
1052 break;
1053 } else if (bzerror != BZ_OK) {
1054 Util_CatchBZ2Error(bzerror);
1055 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001057 }
Georg Brandl47fab922006-02-18 21:57:25 +00001058 self->mode = MODE_READ_EOF;
1059 self->size = self->pos;
1060 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061 }
Georg Brandl47fab922006-02-18 21:57:25 +00001062 offset = self->size + offset;
1063 } else if (where == 1) {
1064 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065 }
1066
Georg Brandl47fab922006-02-18 21:57:25 +00001067 /* Before getting here, offset must be the absolute position the file
1068 * pointer should be set to. */
1069
1070 if (offset >= self->pos) {
1071 /* we can move forward */
1072 offset -= self->pos;
1073 } else {
1074 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001075 BZ2_bzReadClose(&bzerror, self->fp);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001076 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001077 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001078 self->fp = NULL;
1079 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080 if (bzerror != BZ_OK) {
1081 Util_CatchBZ2Error(bzerror);
1082 goto cleanup;
1083 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001084 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001085 if (!ret)
1086 goto cleanup;
1087 Py_DECREF(ret);
1088 ret = NULL;
1089 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001090 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001091 0, 0, NULL, 0);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001092 if (self->fp)
Gregory P. Smith73bee442008-04-12 20:37:48 +00001093 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094 if (bzerror != BZ_OK) {
1095 Util_CatchBZ2Error(bzerror);
1096 goto cleanup;
1097 }
1098 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001099 }
1100
Georg Brandl47fab922006-02-18 21:57:25 +00001101 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001102 goto exit;
1103
1104 /* Before getting here, offset must be set to the number of bytes
1105 * to walk forward. */
1106 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001107 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001108 readsize = buffersize;
1109 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001110 /* offset might be wider that readsize, but the result
1111 * of the subtraction is bound by buffersize (see the
1112 * condition above). buffersize is 8192. */
1113 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001114 Py_BEGIN_ALLOW_THREADS
1115 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1116 buffer, readsize, self);
1117 self->pos += chunksize;
1118 Py_END_ALLOW_THREADS
1119 bytesread += chunksize;
1120 if (bzerror == BZ_STREAM_END) {
1121 self->size = self->pos;
1122 self->mode = MODE_READ_EOF;
1123 break;
1124 } else if (bzerror != BZ_OK) {
1125 Util_CatchBZ2Error(bzerror);
1126 goto cleanup;
1127 }
1128 if (bytesread == offset)
1129 break;
1130 }
1131
1132exit:
1133 Py_INCREF(Py_None);
1134 ret = Py_None;
1135
1136cleanup:
1137 RELEASE_LOCK(self);
1138 return ret;
1139}
1140
1141PyDoc_STRVAR(BZ2File_tell__doc__,
1142"tell() -> int\n\
1143\n\
1144Return the current file position, an integer (may be a long integer).\n\
1145");
1146
1147static PyObject *
1148BZ2File_tell(BZ2FileObject *self, PyObject *args)
1149{
1150 PyObject *ret = NULL;
1151
1152 if (self->mode == MODE_CLOSED) {
1153 PyErr_SetString(PyExc_ValueError,
1154 "I/O operation on closed file");
1155 goto cleanup;
1156 }
1157
Georg Brandla8bcecc2005-09-03 07:49:53 +00001158#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001159 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001160#else
1161 ret = PyLong_FromLongLong(self->pos);
1162#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163
1164cleanup:
1165 return ret;
1166}
1167
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168PyDoc_STRVAR(BZ2File_close__doc__,
1169"close() -> None or (perhaps) an integer\n\
1170\n\
1171Close the file. Sets data attribute .closed to true. A closed file\n\
1172cannot be used for further I/O operations. close() may be called more\n\
1173than once without error.\n\
1174");
1175
1176static PyObject *
1177BZ2File_close(BZ2FileObject *self)
1178{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001179 PyObject *ret = NULL;
1180 int bzerror = BZ_OK;
1181
1182 ACQUIRE_LOCK(self);
1183 switch (self->mode) {
1184 case MODE_READ:
1185 case MODE_READ_EOF:
1186 BZ2_bzReadClose(&bzerror, self->fp);
1187 break;
1188 case MODE_WRITE:
1189 BZ2_bzWriteClose(&bzerror, self->fp,
1190 0, NULL, NULL);
1191 break;
1192 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001193 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001194 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001195 self->fp = NULL;
1196 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001197 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001198 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001199 if (bzerror != BZ_OK) {
1200 Util_CatchBZ2Error(bzerror);
1201 Py_XDECREF(ret);
1202 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001203 }
1204
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001205 RELEASE_LOCK(self);
1206 return ret;
1207}
1208
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001209PyDoc_STRVAR(BZ2File_enter_doc,
1210"__enter__() -> self.");
1211
1212static PyObject *
1213BZ2File_enter(BZ2FileObject *self)
1214{
1215 if (self->mode == MODE_CLOSED) {
1216 PyErr_SetString(PyExc_ValueError,
1217 "I/O operation on closed file");
1218 return NULL;
1219 }
1220 Py_INCREF(self);
1221 return (PyObject *) self;
1222}
1223
1224PyDoc_STRVAR(BZ2File_exit_doc,
1225"__exit__(*excinfo) -> None. Closes the file.");
1226
1227static PyObject *
1228BZ2File_exit(BZ2FileObject *self, PyObject *args)
1229{
1230 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1231 if (!ret)
1232 /* If error occurred, pass through */
1233 return NULL;
1234 Py_DECREF(ret);
1235 Py_RETURN_NONE;
1236}
1237
1238
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001239static PyObject *BZ2File_getiter(BZ2FileObject *self);
1240
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001241static PyMethodDef BZ2File_methods[] = {
1242 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1243 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1244 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001245 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001246 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1247 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1248 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1249 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
Antoine Pitroub74fc2b2009-01-10 16:13:45 +00001251 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1252 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001253 {NULL, NULL} /* sentinel */
1254};
1255
1256
1257/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001258/* Getters and setters of BZ2File. */
1259
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001260/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1261static PyObject *
1262BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1263{
1264 switch (self->f_newlinetypes) {
1265 case NEWLINE_UNKNOWN:
1266 Py_INCREF(Py_None);
1267 return Py_None;
1268 case NEWLINE_CR:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001269 return PyString_FromString("\r");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001270 case NEWLINE_LF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001271 return PyString_FromString("\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001272 case NEWLINE_CR|NEWLINE_LF:
1273 return Py_BuildValue("(ss)", "\r", "\n");
1274 case NEWLINE_CRLF:
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001275 return PyString_FromString("\r\n");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001276 case NEWLINE_CR|NEWLINE_CRLF:
1277 return Py_BuildValue("(ss)", "\r", "\r\n");
1278 case NEWLINE_LF|NEWLINE_CRLF:
1279 return Py_BuildValue("(ss)", "\n", "\r\n");
1280 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1281 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1282 default:
1283 PyErr_Format(PyExc_SystemError,
1284 "Unknown newlines value 0x%x\n",
1285 self->f_newlinetypes);
1286 return NULL;
1287 }
1288}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001289
1290static PyObject *
1291BZ2File_get_closed(BZ2FileObject *self, void *closure)
1292{
1293 return PyInt_FromLong(self->mode == MODE_CLOSED);
1294}
1295
1296static PyObject *
1297BZ2File_get_mode(BZ2FileObject *self, void *closure)
1298{
1299 return PyObject_GetAttrString(self->file, "mode");
1300}
1301
1302static PyObject *
1303BZ2File_get_name(BZ2FileObject *self, void *closure)
1304{
1305 return PyObject_GetAttrString(self->file, "name");
1306}
1307
1308static PyGetSetDef BZ2File_getset[] = {
1309 {"closed", (getter)BZ2File_get_closed, NULL,
1310 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001311 {"newlines", (getter)BZ2File_get_newlines, NULL,
1312 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001313 {"mode", (getter)BZ2File_get_mode, NULL,
1314 "file mode ('r', 'w', or 'U')"},
1315 {"name", (getter)BZ2File_get_name, NULL,
1316 "file name"},
1317 {NULL} /* Sentinel */
1318};
1319
1320
1321/* ===================================================================== */
1322/* Members of BZ2File_Type. */
1323
1324#undef OFF
1325#define OFF(x) offsetof(BZ2FileObject, x)
1326
1327static PyMemberDef BZ2File_members[] = {
1328 {"softspace", T_INT, OFF(f_softspace), 0,
1329 "flag indicating that a space needs to be printed; used by print"},
1330 {NULL} /* Sentinel */
1331};
1332
1333/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001334/* Slot definitions for BZ2File_Type. */
1335
1336static int
1337BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1338{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001339 static char *kwlist[] = {"filename", "mode", "buffering",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001340 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001341 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001342 char *mode = "r";
1343 int buffering = -1;
1344 int compresslevel = 9;
1345 int bzerror;
1346 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001347
1348 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001349
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001350 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1351 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001352 &compresslevel))
1353 return -1;
1354
1355 if (compresslevel < 1 || compresslevel > 9) {
1356 PyErr_SetString(PyExc_ValueError,
1357 "compresslevel must be between 1 and 9");
1358 return -1;
1359 }
1360
1361 for (;;) {
1362 int error = 0;
1363 switch (*mode) {
1364 case 'r':
1365 case 'w':
1366 if (mode_char)
1367 error = 1;
1368 mode_char = *mode;
1369 break;
1370
1371 case 'b':
1372 break;
1373
1374 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001375#ifdef __VMS
1376 self->f_univ_newline = 0;
1377#else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001378 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001379#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001380 break;
1381
1382 default:
1383 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001384 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001385 }
1386 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001387 PyErr_Format(PyExc_ValueError,
1388 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001389 return -1;
1390 }
1391 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001392 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001393 break;
1394 }
1395
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001396 if (mode_char == 0) {
1397 mode_char = 'r';
1398 }
1399
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001400 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001401
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001402 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1403 name, mode, buffering);
1404 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001405 return -1;
1406
1407 /* From now on, we have stuff to dealloc, so jump to error label
1408 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001409
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001410#ifdef WITH_THREAD
1411 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001412 if (!self->lock) {
1413 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001414 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001415 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416#endif
1417
1418 if (mode_char == 'r')
1419 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001420 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001421 0, 0, NULL, 0);
1422 else
1423 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001424 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001425 compresslevel, 0, 0);
1426
1427 if (bzerror != BZ_OK) {
1428 Util_CatchBZ2Error(bzerror);
1429 goto error;
1430 }
Gregory P. Smith73bee442008-04-12 20:37:48 +00001431 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001432
1433 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1434
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001435 return 0;
1436
1437error:
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001438 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001440 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001441 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001442 self->lock = NULL;
1443 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001444#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001445 return -1;
1446}
1447
1448static void
1449BZ2File_dealloc(BZ2FileObject *self)
1450{
1451 int bzerror;
1452#ifdef WITH_THREAD
1453 if (self->lock)
1454 PyThread_free_lock(self->lock);
1455#endif
1456 switch (self->mode) {
1457 case MODE_READ:
1458 case MODE_READ_EOF:
1459 BZ2_bzReadClose(&bzerror, self->fp);
1460 break;
1461 case MODE_WRITE:
1462 BZ2_bzWriteClose(&bzerror, self->fp,
1463 0, NULL, NULL);
1464 break;
1465 }
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001466 if (self->fp) {
Gregory P. Smith73bee442008-04-12 20:37:48 +00001467 PyFile_DecUseCount((PyFileObject *)self->file);
Gregory P. Smithc20adf82008-04-07 06:33:21 +00001468 self->fp = NULL;
1469 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001470 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001471 Py_XDECREF(self->file);
Christian Heimese93237d2007-12-19 02:37:44 +00001472 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001473}
1474
1475/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1476static PyObject *
1477BZ2File_getiter(BZ2FileObject *self)
1478{
1479 if (self->mode == MODE_CLOSED) {
1480 PyErr_SetString(PyExc_ValueError,
1481 "I/O operation on closed file");
1482 return NULL;
1483 }
1484 Py_INCREF((PyObject*)self);
1485 return (PyObject *)self;
1486}
1487
1488/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1489#define READAHEAD_BUFSIZE 8192
1490static PyObject *
1491BZ2File_iternext(BZ2FileObject *self)
1492{
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001493 PyStringObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001494 ACQUIRE_LOCK(self);
1495 if (self->mode == MODE_CLOSED) {
Gregory P. Smith3b1e6b22008-07-07 04:31:58 +00001496 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001497 PyErr_SetString(PyExc_ValueError,
1498 "I/O operation on closed file");
1499 return NULL;
1500 }
1501 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1502 RELEASE_LOCK(self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001503 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504 Py_XDECREF(ret);
1505 return NULL;
1506 }
1507 return (PyObject *)ret;
1508}
1509
1510/* ===================================================================== */
1511/* BZ2File_Type definition. */
1512
1513PyDoc_VAR(BZ2File__doc__) =
1514PyDoc_STR(
1515"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1516\n\
1517Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1518writing. When opened for writing, the file will be created if it doesn't\n\
1519exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1520unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1521is given, must be a number between 1 and 9.\n\
1522")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001523PyDoc_STR(
1524"\n\
1525Add a 'U' to mode to open the file for input with universal newline\n\
1526support. Any line ending in the input file will be seen as a '\\n' in\n\
1527Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1528for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1529'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1530newlines are available only when reading.\n\
1531")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001532;
1533
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001534static PyTypeObject BZ2File_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001535 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001536 "bz2.BZ2File", /*tp_name*/
1537 sizeof(BZ2FileObject), /*tp_basicsize*/
1538 0, /*tp_itemsize*/
1539 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1540 0, /*tp_print*/
1541 0, /*tp_getattr*/
1542 0, /*tp_setattr*/
1543 0, /*tp_compare*/
1544 0, /*tp_repr*/
1545 0, /*tp_as_number*/
1546 0, /*tp_as_sequence*/
1547 0, /*tp_as_mapping*/
1548 0, /*tp_hash*/
1549 0, /*tp_call*/
1550 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001551 PyObject_GenericGetAttr,/*tp_getattro*/
1552 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001553 0, /*tp_as_buffer*/
1554 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1555 BZ2File__doc__, /*tp_doc*/
1556 0, /*tp_traverse*/
1557 0, /*tp_clear*/
1558 0, /*tp_richcompare*/
1559 0, /*tp_weaklistoffset*/
1560 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1561 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1562 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001563 BZ2File_members, /*tp_members*/
1564 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001565 0, /*tp_base*/
1566 0, /*tp_dict*/
1567 0, /*tp_descr_get*/
1568 0, /*tp_descr_set*/
1569 0, /*tp_dictoffset*/
1570 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001571 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001572 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001573 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574 0, /*tp_is_gc*/
1575};
1576
1577
1578/* ===================================================================== */
1579/* Methods of BZ2Comp. */
1580
1581PyDoc_STRVAR(BZ2Comp_compress__doc__,
1582"compress(data) -> string\n\
1583\n\
1584Provide more data to the compressor object. It will return chunks of\n\
1585compressed data whenever possible. When you've finished providing data\n\
1586to compress, call the flush() method to finish the compression process,\n\
1587and return what is left in the internal buffers.\n\
1588");
1589
1590static PyObject *
1591BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1592{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001593 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001594 char *data;
1595 int datasize;
1596 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001597 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001598 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001599 bz_stream *bzs = &self->bzs;
1600 int bzerror;
1601
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001602 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001603 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001604 data = pdata.buf;
1605 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001606
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001607 if (datasize == 0) {
1608 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001609 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001610 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001611
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001612 ACQUIRE_LOCK(self);
1613 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001614 PyErr_SetString(PyExc_ValueError,
1615 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001616 goto error;
1617 }
1618
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001619 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001620 if (!ret)
1621 goto error;
1622
1623 bzs->next_in = data;
1624 bzs->avail_in = datasize;
1625 bzs->next_out = BUF(ret);
1626 bzs->avail_out = bufsize;
1627
1628 totalout = BZS_TOTAL_OUT(bzs);
1629
1630 for (;;) {
1631 Py_BEGIN_ALLOW_THREADS
1632 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1633 Py_END_ALLOW_THREADS
1634 if (bzerror != BZ_RUN_OK) {
1635 Util_CatchBZ2Error(bzerror);
1636 goto error;
1637 }
Georg Brandla47337f2007-03-13 12:34:25 +00001638 if (bzs->avail_in == 0)
1639 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640 if (bzs->avail_out == 0) {
1641 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001642 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001643 BZ2_bzCompressEnd(bzs);
1644 goto error;
1645 }
1646 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1647 - totalout);
1648 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001649 }
1650 }
1651
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001652 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001653
1654 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001655 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001656 return ret;
1657
1658error:
1659 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001660 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001661 Py_XDECREF(ret);
1662 return NULL;
1663}
1664
1665PyDoc_STRVAR(BZ2Comp_flush__doc__,
1666"flush() -> string\n\
1667\n\
1668Finish the compression process and return what is left in internal buffers.\n\
1669You must not use the compressor object after calling this method.\n\
1670");
1671
1672static PyObject *
1673BZ2Comp_flush(BZ2CompObject *self)
1674{
1675 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001676 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001677 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001678 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001679 int bzerror;
1680
1681 ACQUIRE_LOCK(self);
1682 if (!self->running) {
1683 PyErr_SetString(PyExc_ValueError, "object was already "
1684 "flushed");
1685 goto error;
1686 }
1687 self->running = 0;
1688
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001689 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001690 if (!ret)
1691 goto error;
1692
1693 bzs->next_out = BUF(ret);
1694 bzs->avail_out = bufsize;
1695
1696 totalout = BZS_TOTAL_OUT(bzs);
1697
1698 for (;;) {
1699 Py_BEGIN_ALLOW_THREADS
1700 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1701 Py_END_ALLOW_THREADS
1702 if (bzerror == BZ_STREAM_END) {
1703 break;
1704 } else if (bzerror != BZ_FINISH_OK) {
1705 Util_CatchBZ2Error(bzerror);
1706 goto error;
1707 }
1708 if (bzs->avail_out == 0) {
1709 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001710 if (_PyString_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001711 goto error;
1712 bzs->next_out = BUF(ret);
1713 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1714 - totalout);
1715 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1716 }
1717 }
1718
1719 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001720 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001721
1722 RELEASE_LOCK(self);
1723 return ret;
1724
1725error:
1726 RELEASE_LOCK(self);
1727 Py_XDECREF(ret);
1728 return NULL;
1729}
1730
1731static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001732 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1733 BZ2Comp_compress__doc__},
1734 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1735 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001736 {NULL, NULL} /* sentinel */
1737};
1738
1739
1740/* ===================================================================== */
1741/* Slot definitions for BZ2Comp_Type. */
1742
1743static int
1744BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1745{
1746 int compresslevel = 9;
1747 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001748 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001749
1750 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1751 kwlist, &compresslevel))
1752 return -1;
1753
1754 if (compresslevel < 1 || compresslevel > 9) {
1755 PyErr_SetString(PyExc_ValueError,
1756 "compresslevel must be between 1 and 9");
1757 goto error;
1758 }
1759
1760#ifdef WITH_THREAD
1761 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001762 if (!self->lock) {
1763 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001764 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001765 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001766#endif
1767
1768 memset(&self->bzs, 0, sizeof(bz_stream));
1769 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1770 if (bzerror != BZ_OK) {
1771 Util_CatchBZ2Error(bzerror);
1772 goto error;
1773 }
1774
1775 self->running = 1;
1776
1777 return 0;
1778error:
1779#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001780 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001781 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001782 self->lock = NULL;
1783 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001784#endif
1785 return -1;
1786}
1787
1788static void
1789BZ2Comp_dealloc(BZ2CompObject *self)
1790{
1791#ifdef WITH_THREAD
1792 if (self->lock)
1793 PyThread_free_lock(self->lock);
1794#endif
1795 BZ2_bzCompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00001796 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001797}
1798
1799
1800/* ===================================================================== */
1801/* BZ2Comp_Type definition. */
1802
1803PyDoc_STRVAR(BZ2Comp__doc__,
1804"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1805\n\
1806Create a new compressor object. This object may be used to compress\n\
1807data sequentially. If you want to compress data in one shot, use the\n\
1808compress() function instead. The compresslevel parameter, if given,\n\
1809must be a number between 1 and 9.\n\
1810");
1811
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001812static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001813 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001814 "bz2.BZ2Compressor", /*tp_name*/
1815 sizeof(BZ2CompObject), /*tp_basicsize*/
1816 0, /*tp_itemsize*/
1817 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1818 0, /*tp_print*/
1819 0, /*tp_getattr*/
1820 0, /*tp_setattr*/
1821 0, /*tp_compare*/
1822 0, /*tp_repr*/
1823 0, /*tp_as_number*/
1824 0, /*tp_as_sequence*/
1825 0, /*tp_as_mapping*/
1826 0, /*tp_hash*/
1827 0, /*tp_call*/
1828 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001829 PyObject_GenericGetAttr,/*tp_getattro*/
1830 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001831 0, /*tp_as_buffer*/
1832 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1833 BZ2Comp__doc__, /*tp_doc*/
1834 0, /*tp_traverse*/
1835 0, /*tp_clear*/
1836 0, /*tp_richcompare*/
1837 0, /*tp_weaklistoffset*/
1838 0, /*tp_iter*/
1839 0, /*tp_iternext*/
1840 BZ2Comp_methods, /*tp_methods*/
1841 0, /*tp_members*/
1842 0, /*tp_getset*/
1843 0, /*tp_base*/
1844 0, /*tp_dict*/
1845 0, /*tp_descr_get*/
1846 0, /*tp_descr_set*/
1847 0, /*tp_dictoffset*/
1848 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001849 PyType_GenericAlloc, /*tp_alloc*/
1850 PyType_GenericNew, /*tp_new*/
1851 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001852 0, /*tp_is_gc*/
1853};
1854
1855
1856/* ===================================================================== */
1857/* Members of BZ2Decomp. */
1858
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001859#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860#define OFF(x) offsetof(BZ2DecompObject, x)
1861
1862static PyMemberDef BZ2Decomp_members[] = {
1863 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1864 {NULL} /* Sentinel */
1865};
1866
1867
1868/* ===================================================================== */
1869/* Methods of BZ2Decomp. */
1870
1871PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1872"decompress(data) -> string\n\
1873\n\
1874Provide more data to the decompressor object. It will return chunks\n\
1875of decompressed data whenever possible. If you try to decompress data\n\
1876after the end of stream is found, EOFError will be raised. If any data\n\
1877was found after the end of stream, it'll be ignored and saved in\n\
1878unused_data attribute.\n\
1879");
1880
1881static PyObject *
1882BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1883{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001884 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001885 char *data;
1886 int datasize;
1887 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001888 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001889 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001890 bz_stream *bzs = &self->bzs;
1891 int bzerror;
1892
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001893 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001894 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001895 data = pdata.buf;
1896 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001897
1898 ACQUIRE_LOCK(self);
1899 if (!self->running) {
1900 PyErr_SetString(PyExc_EOFError, "end of stream was "
1901 "already found");
1902 goto error;
1903 }
1904
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001905 ret = PyString_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001906 if (!ret)
1907 goto error;
1908
1909 bzs->next_in = data;
1910 bzs->avail_in = datasize;
1911 bzs->next_out = BUF(ret);
1912 bzs->avail_out = bufsize;
1913
1914 totalout = BZS_TOTAL_OUT(bzs);
1915
1916 for (;;) {
1917 Py_BEGIN_ALLOW_THREADS
1918 bzerror = BZ2_bzDecompress(bzs);
1919 Py_END_ALLOW_THREADS
1920 if (bzerror == BZ_STREAM_END) {
1921 if (bzs->avail_in != 0) {
1922 Py_DECREF(self->unused_data);
1923 self->unused_data =
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001924 PyString_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925 bzs->avail_in);
1926 }
1927 self->running = 0;
1928 break;
1929 }
1930 if (bzerror != BZ_OK) {
1931 Util_CatchBZ2Error(bzerror);
1932 goto error;
1933 }
Georg Brandla47337f2007-03-13 12:34:25 +00001934 if (bzs->avail_in == 0)
1935 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001936 if (bzs->avail_out == 0) {
1937 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001938 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001939 BZ2_bzDecompressEnd(bzs);
1940 goto error;
1941 }
1942 bzs->next_out = BUF(ret);
1943 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1944 - totalout);
1945 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001946 }
1947 }
1948
1949 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001950 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951
1952 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001953 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954 return ret;
1955
1956error:
1957 RELEASE_LOCK(self);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001958 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001959 Py_XDECREF(ret);
1960 return NULL;
1961}
1962
1963static PyMethodDef BZ2Decomp_methods[] = {
1964 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1965 {NULL, NULL} /* sentinel */
1966};
1967
1968
1969/* ===================================================================== */
1970/* Slot definitions for BZ2Decomp_Type. */
1971
1972static int
1973BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1974{
1975 int bzerror;
1976
1977 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1978 return -1;
1979
1980#ifdef WITH_THREAD
1981 self->lock = PyThread_allocate_lock();
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001982 if (!self->lock) {
1983 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001984 goto error;
Neal Norwitzb59d08c2006-07-22 16:20:49 +00001985 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001986#endif
1987
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001988 self->unused_data = PyString_FromString("");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001989 if (!self->unused_data)
1990 goto error;
1991
1992 memset(&self->bzs, 0, sizeof(bz_stream));
1993 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1994 if (bzerror != BZ_OK) {
1995 Util_CatchBZ2Error(bzerror);
1996 goto error;
1997 }
1998
1999 self->running = 1;
2000
2001 return 0;
2002
2003error:
2004#ifdef WITH_THREAD
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002005 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002006 PyThread_free_lock(self->lock);
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002007 self->lock = NULL;
2008 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002009#endif
Neal Norwitzb59d08c2006-07-22 16:20:49 +00002010 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002011 return -1;
2012}
2013
2014static void
2015BZ2Decomp_dealloc(BZ2DecompObject *self)
2016{
2017#ifdef WITH_THREAD
2018 if (self->lock)
2019 PyThread_free_lock(self->lock);
2020#endif
2021 Py_XDECREF(self->unused_data);
2022 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimese93237d2007-12-19 02:37:44 +00002023 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002024}
2025
2026
2027/* ===================================================================== */
2028/* BZ2Decomp_Type definition. */
2029
2030PyDoc_STRVAR(BZ2Decomp__doc__,
2031"BZ2Decompressor() -> decompressor object\n\
2032\n\
2033Create a new decompressor object. This object may be used to decompress\n\
2034data sequentially. If you want to decompress data in one shot, use the\n\
2035decompress() function instead.\n\
2036");
2037
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002038static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00002039 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002040 "bz2.BZ2Decompressor", /*tp_name*/
2041 sizeof(BZ2DecompObject), /*tp_basicsize*/
2042 0, /*tp_itemsize*/
2043 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2044 0, /*tp_print*/
2045 0, /*tp_getattr*/
2046 0, /*tp_setattr*/
2047 0, /*tp_compare*/
2048 0, /*tp_repr*/
2049 0, /*tp_as_number*/
2050 0, /*tp_as_sequence*/
2051 0, /*tp_as_mapping*/
2052 0, /*tp_hash*/
2053 0, /*tp_call*/
2054 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002055 PyObject_GenericGetAttr,/*tp_getattro*/
2056 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002057 0, /*tp_as_buffer*/
2058 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2059 BZ2Decomp__doc__, /*tp_doc*/
2060 0, /*tp_traverse*/
2061 0, /*tp_clear*/
2062 0, /*tp_richcompare*/
2063 0, /*tp_weaklistoffset*/
2064 0, /*tp_iter*/
2065 0, /*tp_iternext*/
2066 BZ2Decomp_methods, /*tp_methods*/
2067 BZ2Decomp_members, /*tp_members*/
2068 0, /*tp_getset*/
2069 0, /*tp_base*/
2070 0, /*tp_dict*/
2071 0, /*tp_descr_get*/
2072 0, /*tp_descr_set*/
2073 0, /*tp_dictoffset*/
2074 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00002075 PyType_GenericAlloc, /*tp_alloc*/
2076 PyType_GenericNew, /*tp_new*/
2077 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002078 0, /*tp_is_gc*/
2079};
2080
2081
2082/* ===================================================================== */
2083/* Module functions. */
2084
2085PyDoc_STRVAR(bz2_compress__doc__,
2086"compress(data [, compresslevel=9]) -> string\n\
2087\n\
2088Compress data in one shot. If you want to compress data sequentially,\n\
2089use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2090given, must be a number between 1 and 9.\n\
2091");
2092
2093static PyObject *
2094bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2095{
2096 int compresslevel=9;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002097 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002098 char *data;
2099 int datasize;
2100 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002101 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002102 bz_stream _bzs;
2103 bz_stream *bzs = &_bzs;
2104 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002105 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002106
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002107 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2108 kwlist, &pdata,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002109 &compresslevel))
2110 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002111 data = pdata.buf;
2112 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002113
2114 if (compresslevel < 1 || compresslevel > 9) {
2115 PyErr_SetString(PyExc_ValueError,
2116 "compresslevel must be between 1 and 9");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002117 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002118 return NULL;
2119 }
2120
2121 /* Conforming to bz2 manual, this is large enough to fit compressed
2122 * data in one shot. We will check it later anyway. */
2123 bufsize = datasize + (datasize/100+1) + 600;
2124
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002125 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002126 if (!ret) {
2127 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002128 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002129 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002130
2131 memset(bzs, 0, sizeof(bz_stream));
2132
2133 bzs->next_in = data;
2134 bzs->avail_in = datasize;
2135 bzs->next_out = BUF(ret);
2136 bzs->avail_out = bufsize;
2137
2138 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2139 if (bzerror != BZ_OK) {
2140 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002141 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002142 Py_DECREF(ret);
2143 return NULL;
2144 }
Tim Peterse3228092002-11-09 04:21:44 +00002145
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002146 for (;;) {
2147 Py_BEGIN_ALLOW_THREADS
2148 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2149 Py_END_ALLOW_THREADS
2150 if (bzerror == BZ_STREAM_END) {
2151 break;
2152 } else if (bzerror != BZ_FINISH_OK) {
2153 BZ2_bzCompressEnd(bzs);
2154 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002155 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002156 Py_DECREF(ret);
2157 return NULL;
2158 }
2159 if (bzs->avail_out == 0) {
2160 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002161 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002162 BZ2_bzCompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002163 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002164 Py_DECREF(ret);
2165 return NULL;
2166 }
2167 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2168 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2169 }
2170 }
2171
2172 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002173 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002174 BZ2_bzCompressEnd(bzs);
2175
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002176 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002177 return ret;
2178}
2179
2180PyDoc_STRVAR(bz2_decompress__doc__,
2181"decompress(data) -> decompressed data\n\
2182\n\
2183Decompress data in one shot. If you want to decompress data sequentially,\n\
2184use an instance of BZ2Decompressor instead.\n\
2185");
2186
2187static PyObject *
2188bz2_decompress(PyObject *self, PyObject *args)
2189{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002190 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002191 char *data;
2192 int datasize;
2193 int bufsize = SMALLCHUNK;
2194 PyObject *ret;
2195 bz_stream _bzs;
2196 bz_stream *bzs = &_bzs;
2197 int bzerror;
2198
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002199 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002200 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002201 data = pdata.buf;
2202 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002203
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002204 if (datasize == 0) {
2205 PyBuffer_Release(&pdata);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002206 return PyString_FromString("");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002207 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002208
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002209 ret = PyString_FromStringAndSize(NULL, bufsize);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002210 if (!ret) {
2211 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002212 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002213 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002214
2215 memset(bzs, 0, sizeof(bz_stream));
2216
2217 bzs->next_in = data;
2218 bzs->avail_in = datasize;
2219 bzs->next_out = BUF(ret);
2220 bzs->avail_out = bufsize;
2221
2222 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2223 if (bzerror != BZ_OK) {
2224 Util_CatchBZ2Error(bzerror);
2225 Py_DECREF(ret);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002226 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002227 return NULL;
2228 }
Tim Peterse3228092002-11-09 04:21:44 +00002229
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002230 for (;;) {
2231 Py_BEGIN_ALLOW_THREADS
2232 bzerror = BZ2_bzDecompress(bzs);
2233 Py_END_ALLOW_THREADS
2234 if (bzerror == BZ_STREAM_END) {
2235 break;
2236 } else if (bzerror != BZ_OK) {
2237 BZ2_bzDecompressEnd(bzs);
2238 Util_CatchBZ2Error(bzerror);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002239 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002240 Py_DECREF(ret);
2241 return NULL;
2242 }
Georg Brandla47337f2007-03-13 12:34:25 +00002243 if (bzs->avail_in == 0) {
2244 BZ2_bzDecompressEnd(bzs);
2245 PyErr_SetString(PyExc_ValueError,
2246 "couldn't find end of stream");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002247 PyBuffer_Release(&pdata);
Georg Brandla47337f2007-03-13 12:34:25 +00002248 Py_DECREF(ret);
2249 return NULL;
2250 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002251 if (bzs->avail_out == 0) {
2252 bufsize = Util_NewBufferSize(bufsize);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002253 if (_PyString_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002254 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002255 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002256 Py_DECREF(ret);
2257 return NULL;
2258 }
2259 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2260 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002261 }
2262 }
2263
2264 if (bzs->avail_out != 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002265 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002266 BZ2_bzDecompressEnd(bzs);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002267 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002268
2269 return ret;
2270}
2271
2272static PyMethodDef bz2_methods[] = {
2273 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2274 bz2_compress__doc__},
2275 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2276 bz2_decompress__doc__},
2277 {NULL, NULL} /* sentinel */
2278};
2279
2280/* ===================================================================== */
2281/* Initialization function. */
2282
2283PyDoc_STRVAR(bz2__doc__,
2284"The python bz2 module provides a comprehensive interface for\n\
2285the bz2 compression library. It implements a complete file\n\
2286interface, one shot (de)compression functions, and types for\n\
2287sequential (de)compression.\n\
2288");
2289
Neal Norwitz21d896c2003-07-01 20:15:21 +00002290PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002291initbz2(void)
2292{
2293 PyObject *m;
2294
Christian Heimese93237d2007-12-19 02:37:44 +00002295 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2296 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2297 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002298
2299 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002300 if (m == NULL)
2301 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002302
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002303 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002304
2305 Py_INCREF(&BZ2File_Type);
2306 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2307
2308 Py_INCREF(&BZ2Comp_Type);
2309 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2310
2311 Py_INCREF(&BZ2Decomp_Type);
2312 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2313}