blob: cfd8d71b7e9031099715ab1c4e4c183e57572fba [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitrouf3a0ff62009-10-27 17:46:09 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000103 PyObject_HEAD
104 PyObject *file;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000110 int f_softspace; /* Flag used by 'print' command */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000111
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000115
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000121 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000122#endif
123} BZ2FileObject;
124
125typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000129#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000130 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000131#endif
132} BZ2CompObject;
133
134typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000139#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000140 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
Antoine Pitroucdb63fb2010-08-01 20:16:12 +0000147/* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150static int
151check_iterbuffered(BZ2FileObject *f)
152{
153 if (f->f_buf != NULL &&
154 (f->f_bufend - f->f_bufptr) > 0 &&
155 f->f_buf[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError,
157 "Mixing iteration and read methods would lose data");
158 return -1;
159 }
160 return 0;
161}
162
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000163static int
164Util_CatchBZ2Error(int bzerror)
165{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000166 int ret = 0;
167 switch(bzerror) {
168 case BZ_OK:
169 case BZ_STREAM_END:
170 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000171
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000172#ifdef BZ_CONFIG_ERROR
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000173 case BZ_CONFIG_ERROR:
174 PyErr_SetString(PyExc_SystemError,
175 "the bz2 library was not compiled "
176 "correctly");
177 ret = 1;
178 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000179#endif
Tim Peterse3228092002-11-09 04:21:44 +0000180
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000181 case BZ_PARAM_ERROR:
182 PyErr_SetString(PyExc_ValueError,
183 "the bz2 library has received wrong "
184 "parameters");
185 ret = 1;
186 break;
Tim Peterse3228092002-11-09 04:21:44 +0000187
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000188 case BZ_MEM_ERROR:
189 PyErr_NoMemory();
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000193 case BZ_DATA_ERROR:
194 case BZ_DATA_ERROR_MAGIC:
195 PyErr_SetString(PyExc_IOError, "invalid data stream");
196 ret = 1;
197 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000198
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000199 case BZ_IO_ERROR:
200 PyErr_SetString(PyExc_IOError, "unknown IO error");
201 ret = 1;
202 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000203
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000204 case BZ_UNEXPECTED_EOF:
205 PyErr_SetString(PyExc_EOFError,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
208 ret = 1;
209 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000210
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000211 case BZ_SEQUENCE_ERROR:
212 PyErr_SetString(PyExc_RuntimeError,
213 "wrong sequence of bz2 library "
214 "commands used");
215 ret = 1;
216 break;
217 }
218 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000219}
220
221#if BUFSIZ < 8192
222#define SMALLCHUNK 8192
223#else
224#define SMALLCHUNK BUFSIZ
225#endif
226
227#if SIZEOF_INT < 4
228#define BIGCHUNK (512 * 32)
229#else
230#define BIGCHUNK (512 * 1024)
231#endif
232
233/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
234static size_t
235Util_NewBufferSize(size_t currentsize)
236{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000237 if (currentsize > SMALLCHUNK) {
238 /* Keep doubling until we reach BIGCHUNK;
239 then keep adding BIGCHUNK. */
240 if (currentsize <= BIGCHUNK)
241 return currentsize + currentsize;
242 else
243 return currentsize + BIGCHUNK;
244 }
245 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000246}
247
248/* This is a hacked version of Python's fileobject.c:get_line(). */
249static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000250Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000252 char c;
253 char *buf, *end;
254 size_t total_v_size; /* total # of slots in buffer */
255 size_t used_v_size; /* # used slots in buffer */
256 size_t increment; /* amount to increment the buffer */
257 PyObject *v;
258 int bzerror;
259 int bytes_read;
260 int newlinetypes = f->f_newlinetypes;
261 int skipnextlf = f->f_skipnextlf;
262 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000264 total_v_size = n > 0 ? n : 100;
265 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
266 if (v == NULL)
267 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000268
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000269 buf = BUF(v);
270 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000271
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000272 for (;;) {
273 Py_BEGIN_ALLOW_THREADS
274 while (buf != end) {
275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276 f->pos++;
277 if (bytes_read == 0) break;
278 if (univ_newline) {
279 if (skipnextlf) {
280 skipnextlf = 0;
281 if (c == '\n') {
282 /* Seeing a \n here with skipnextlf true means we
283 * saw a \r before.
284 */
285 newlinetypes |= NEWLINE_CRLF;
286 if (bzerror != BZ_OK) break;
287 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
288 f->pos++;
289 if (bytes_read == 0) break;
290 } else {
291 newlinetypes |= NEWLINE_CR;
292 }
293 }
294 if (c == '\r') {
295 skipnextlf = 1;
296 c = '\n';
297 } else if (c == '\n')
298 newlinetypes |= NEWLINE_LF;
299 }
300 *buf++ = c;
301 if (bzerror != BZ_OK || c == '\n') break;
302 }
303 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
304 newlinetypes |= NEWLINE_CR;
305 Py_END_ALLOW_THREADS
306 f->f_newlinetypes = newlinetypes;
307 f->f_skipnextlf = skipnextlf;
308 if (bzerror == BZ_STREAM_END) {
309 f->size = f->pos;
310 f->mode = MODE_READ_EOF;
311 break;
312 } else if (bzerror != BZ_OK) {
313 Util_CatchBZ2Error(bzerror);
314 Py_DECREF(v);
315 return NULL;
316 }
317 if (c == '\n')
318 break;
319 /* Must be because buf == end */
320 if (n > 0)
321 break;
322 used_v_size = total_v_size;
323 increment = total_v_size >> 2; /* mild exponential growth */
324 total_v_size += increment;
325 if (total_v_size > INT_MAX) {
326 PyErr_SetString(PyExc_OverflowError,
327 "line is longer than a Python string can hold");
328 Py_DECREF(v);
329 return NULL;
330 }
331 if (_PyString_Resize(&v, total_v_size) < 0)
332 return NULL;
333 buf = BUF(v) + used_v_size;
334 end = BUF(v) + total_v_size;
335 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000336
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000337 used_v_size = buf - BUF(v);
338 if (used_v_size != total_v_size)
339 _PyString_Resize(&v, used_v_size);
340 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000341}
342
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000343/* This is a hacked version of Python's
344 * fileobject.c:Py_UniversalNewlineFread(). */
345size_t
346Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000347 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000348{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000349 char *dst = buf;
350 int newlinetypes, skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000351
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000352 assert(buf != NULL);
353 assert(stream != NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000354
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000355 if (!f->f_univ_newline)
356 return BZ2_bzRead(bzerror, stream, buf, n);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000357
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000358 newlinetypes = f->f_newlinetypes;
359 skipnextlf = f->f_skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000360
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000361 /* Invariant: n is the number of bytes remaining to be filled
362 * in the buffer.
363 */
364 while (n) {
365 size_t nread;
366 int shortread;
367 char *src = dst;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000368
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000369 nread = BZ2_bzRead(bzerror, stream, dst, n);
370 assert(nread <= n);
371 n -= nread; /* assuming 1 byte out for each in; will adjust */
372 shortread = n != 0; /* true iff EOF or error */
373 while (nread--) {
374 char c = *src++;
375 if (c == '\r') {
376 /* Save as LF and set flag to skip next LF. */
377 *dst++ = '\n';
378 skipnextlf = 1;
379 }
380 else if (skipnextlf && c == '\n') {
381 /* Skip LF, and remember we saw CR LF. */
382 skipnextlf = 0;
383 newlinetypes |= NEWLINE_CRLF;
384 ++n;
385 }
386 else {
387 /* Normal char to be stored in buffer. Also
388 * update the newlinetypes flag if either this
389 * is an LF or the previous char was a CR.
390 */
391 if (c == '\n')
392 newlinetypes |= NEWLINE_LF;
393 else if (skipnextlf)
394 newlinetypes |= NEWLINE_CR;
395 *dst++ = c;
396 skipnextlf = 0;
397 }
398 }
399 if (shortread) {
400 /* If this is EOF, update type flags. */
401 if (skipnextlf && *bzerror == BZ_STREAM_END)
402 newlinetypes |= NEWLINE_CR;
403 break;
404 }
405 }
406 f->f_newlinetypes = newlinetypes;
407 f->f_skipnextlf = skipnextlf;
408 return dst - buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000410
411/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
412static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000413Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000415 if (f->f_buf != NULL) {
416 PyMem_Free(f->f_buf);
417 f->f_buf = NULL;
418 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000419}
420
421/* This is a hacked version of Python's fileobject.c:readahead(). */
422static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000423Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000424{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000425 int chunksize;
426 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000428 if (f->f_buf != NULL) {
429 if((f->f_bufend - f->f_bufptr) >= 1)
430 return 0;
431 else
432 Util_DropReadAhead(f);
433 }
434 if (f->mode == MODE_READ_EOF) {
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf;
437 return 0;
438 }
439 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
440 PyErr_NoMemory();
441 return -1;
442 }
443 Py_BEGIN_ALLOW_THREADS
444 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
445 bufsize, f);
446 Py_END_ALLOW_THREADS
447 f->pos += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 f->size = f->pos;
450 f->mode = MODE_READ_EOF;
451 } else if (bzerror != BZ_OK) {
452 Util_CatchBZ2Error(bzerror);
453 Util_DropReadAhead(f);
454 return -1;
455 }
456 f->f_bufptr = f->f_buf;
457 f->f_bufend = f->f_buf + chunksize;
458 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000459}
460
461/* This is a hacked version of Python's
462 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000463static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000464Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000465{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000466 PyStringObject* s;
467 char *bufptr;
468 char *buf;
469 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000471 if (f->f_buf == NULL)
472 if (Util_ReadAhead(f, bufsize) < 0)
473 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000474
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000475 len = f->f_bufend - f->f_bufptr;
476 if (len == 0)
477 return (PyStringObject *)
478 PyString_FromStringAndSize(NULL, skip);
479 bufptr = memchr(f->f_bufptr, '\n', len);
480 if (bufptr != NULL) {
481 bufptr++; /* Count the '\n' */
482 len = bufptr - f->f_bufptr;
483 s = (PyStringObject *)
484 PyString_FromStringAndSize(NULL, skip+len);
485 if (s == NULL)
486 return NULL;
487 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
488 f->f_bufptr = bufptr;
489 if (bufptr == f->f_bufend)
490 Util_DropReadAhead(f);
491 } else {
492 bufptr = f->f_bufptr;
493 buf = f->f_buf;
494 f->f_buf = NULL; /* Force new readahead buffer */
495 s = Util_ReadAheadGetLineSkip(f, skip+len,
496 bufsize + (bufsize>>2));
497 if (s == NULL) {
498 PyMem_Free(buf);
499 return NULL;
500 }
501 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
502 PyMem_Free(buf);
503 }
504 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000505}
506
507/* ===================================================================== */
508/* Methods of BZ2File. */
509
510PyDoc_STRVAR(BZ2File_read__doc__,
511"read([size]) -> string\n\
512\n\
513Read at most size uncompressed bytes, returned as a string. If the size\n\
514argument is negative or omitted, read until EOF is reached.\n\
515");
516
517/* This is a hacked version of Python's fileobject.c:file_read(). */
518static PyObject *
519BZ2File_read(BZ2FileObject *self, PyObject *args)
520{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000521 long bytesrequested = -1;
522 size_t bytesread, buffersize, chunksize;
523 int bzerror;
524 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000525
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000526 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
527 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000528
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000529 ACQUIRE_LOCK(self);
530 switch (self->mode) {
531 case MODE_READ:
532 break;
533 case MODE_READ_EOF:
534 ret = PyString_FromString("");
535 goto cleanup;
536 case MODE_CLOSED:
537 PyErr_SetString(PyExc_ValueError,
538 "I/O operation on closed file");
539 goto cleanup;
540 default:
541 PyErr_SetString(PyExc_IOError,
542 "file is not ready for reading");
543 goto cleanup;
544 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000545
Antoine Pitroucdb63fb2010-08-01 20:16:12 +0000546 /* refuse to mix with f.next() */
547 if (check_iterbuffered(self))
548 goto cleanup;
549
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000550 if (bytesrequested < 0)
551 buffersize = Util_NewBufferSize((size_t)0);
552 else
553 buffersize = bytesrequested;
554 if (buffersize > INT_MAX) {
555 PyErr_SetString(PyExc_OverflowError,
556 "requested number of bytes is "
557 "more than a Python string can hold");
558 goto cleanup;
559 }
560 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
561 if (ret == NULL)
562 goto cleanup;
563 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000564
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000565 for (;;) {
566 Py_BEGIN_ALLOW_THREADS
567 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
568 BUF(ret)+bytesread,
569 buffersize-bytesread,
570 self);
571 self->pos += chunksize;
572 Py_END_ALLOW_THREADS
573 bytesread += chunksize;
574 if (bzerror == BZ_STREAM_END) {
575 self->size = self->pos;
576 self->mode = MODE_READ_EOF;
577 break;
578 } else if (bzerror != BZ_OK) {
579 Util_CatchBZ2Error(bzerror);
580 Py_DECREF(ret);
581 ret = NULL;
582 goto cleanup;
583 }
584 if (bytesrequested < 0) {
585 buffersize = Util_NewBufferSize(buffersize);
586 if (_PyString_Resize(&ret, buffersize) < 0)
587 goto cleanup;
588 } else {
589 break;
590 }
591 }
592 if (bytesread != buffersize)
593 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000594
595cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000596 RELEASE_LOCK(self);
597 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000598}
599
600PyDoc_STRVAR(BZ2File_readline__doc__,
601"readline([size]) -> string\n\
602\n\
603Return the next line from the file, as a string, retaining newline.\n\
604A non-negative size argument will limit the maximum number of bytes to\n\
605return (an incomplete line may be returned then). Return an empty\n\
606string at EOF.\n\
607");
608
609static PyObject *
610BZ2File_readline(BZ2FileObject *self, PyObject *args)
611{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000612 PyObject *ret = NULL;
613 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000614
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000615 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
616 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000617
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000618 ACQUIRE_LOCK(self);
619 switch (self->mode) {
620 case MODE_READ:
621 break;
622 case MODE_READ_EOF:
623 ret = PyString_FromString("");
624 goto cleanup;
625 case MODE_CLOSED:
626 PyErr_SetString(PyExc_ValueError,
627 "I/O operation on closed file");
628 goto cleanup;
629 default:
630 PyErr_SetString(PyExc_IOError,
631 "file is not ready for reading");
632 goto cleanup;
633 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000634
Antoine Pitroucdb63fb2010-08-01 20:16:12 +0000635 /* refuse to mix with f.next() */
636 if (check_iterbuffered(self))
637 goto cleanup;
638
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000639 if (sizehint == 0)
640 ret = PyString_FromString("");
641 else
642 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000643
644cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000645 RELEASE_LOCK(self);
646 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000647}
648
649PyDoc_STRVAR(BZ2File_readlines__doc__,
650"readlines([size]) -> list\n\
651\n\
652Call readline() repeatedly and return a list of lines read.\n\
653The optional size argument, if given, is an approximate bound on the\n\
654total number of bytes in the lines returned.\n\
655");
656
657/* This is a hacked version of Python's fileobject.c:file_readlines(). */
658static PyObject *
659BZ2File_readlines(BZ2FileObject *self, PyObject *args)
660{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000661 long sizehint = 0;
662 PyObject *list = NULL;
663 PyObject *line;
664 char small_buffer[SMALLCHUNK];
665 char *buffer = small_buffer;
666 size_t buffersize = SMALLCHUNK;
667 PyObject *big_buffer = NULL;
668 size_t nfilled = 0;
669 size_t nread;
670 size_t totalread = 0;
671 char *p, *q, *end;
672 int err;
673 int shortread = 0;
674 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000675
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000676 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
677 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000678
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000679 ACQUIRE_LOCK(self);
680 switch (self->mode) {
681 case MODE_READ:
682 break;
683 case MODE_READ_EOF:
684 list = PyList_New(0);
685 goto cleanup;
686 case MODE_CLOSED:
687 PyErr_SetString(PyExc_ValueError,
688 "I/O operation on closed file");
689 goto cleanup;
690 default:
691 PyErr_SetString(PyExc_IOError,
692 "file is not ready for reading");
693 goto cleanup;
694 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695
Antoine Pitroucdb63fb2010-08-01 20:16:12 +0000696 /* refuse to mix with f.next() */
697 if (check_iterbuffered(self))
698 goto cleanup;
699
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000700 if ((list = PyList_New(0)) == NULL)
701 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000702
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000703 for (;;) {
704 Py_BEGIN_ALLOW_THREADS
705 nread = Util_UnivNewlineRead(&bzerror, self->fp,
706 buffer+nfilled,
707 buffersize-nfilled, self);
708 self->pos += nread;
709 Py_END_ALLOW_THREADS
710 if (bzerror == BZ_STREAM_END) {
711 self->size = self->pos;
712 self->mode = MODE_READ_EOF;
713 if (nread == 0) {
714 sizehint = 0;
715 break;
716 }
717 shortread = 1;
718 } else if (bzerror != BZ_OK) {
719 Util_CatchBZ2Error(bzerror);
720 error:
721 Py_DECREF(list);
722 list = NULL;
723 goto cleanup;
724 }
725 totalread += nread;
726 p = memchr(buffer+nfilled, '\n', nread);
727 if (!shortread && p == NULL) {
728 /* Need a larger buffer to fit this line */
729 nfilled += nread;
730 buffersize *= 2;
731 if (buffersize > INT_MAX) {
732 PyErr_SetString(PyExc_OverflowError,
733 "line is longer than a Python string can hold");
734 goto error;
735 }
736 if (big_buffer == NULL) {
737 /* Create the big buffer */
738 big_buffer = PyString_FromStringAndSize(
739 NULL, buffersize);
740 if (big_buffer == NULL)
741 goto error;
742 buffer = PyString_AS_STRING(big_buffer);
743 memcpy(buffer, small_buffer, nfilled);
744 }
745 else {
746 /* Grow the big buffer */
747 _PyString_Resize(&big_buffer, buffersize);
748 buffer = PyString_AS_STRING(big_buffer);
749 }
750 continue;
751 }
752 end = buffer+nfilled+nread;
753 q = buffer;
754 while (p != NULL) {
755 /* Process complete lines */
756 p++;
757 line = PyString_FromStringAndSize(q, p-q);
758 if (line == NULL)
759 goto error;
760 err = PyList_Append(list, line);
761 Py_DECREF(line);
762 if (err != 0)
763 goto error;
764 q = p;
765 p = memchr(q, '\n', end-q);
766 }
767 /* Move the remaining incomplete line to the start */
768 nfilled = end-q;
769 memmove(buffer, q, nfilled);
770 if (sizehint > 0)
771 if (totalread >= (size_t)sizehint)
772 break;
773 if (shortread) {
774 sizehint = 0;
775 break;
776 }
777 }
778 if (nfilled != 0) {
779 /* Partial last line */
780 line = PyString_FromStringAndSize(buffer, nfilled);
781 if (line == NULL)
782 goto error;
783 if (sizehint > 0) {
784 /* Need to complete the last line */
785 PyObject *rest = Util_GetLine(self, 0);
786 if (rest == NULL) {
787 Py_DECREF(line);
788 goto error;
789 }
790 PyString_Concat(&line, rest);
791 Py_DECREF(rest);
792 if (line == NULL)
793 goto error;
794 }
795 err = PyList_Append(list, line);
796 Py_DECREF(line);
797 if (err != 0)
798 goto error;
799 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000800
801 cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000802 RELEASE_LOCK(self);
803 if (big_buffer) {
804 Py_DECREF(big_buffer);
805 }
806 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807}
808
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000809PyDoc_STRVAR(BZ2File_xreadlines__doc__,
810"xreadlines() -> self\n\
811\n\
812For backward compatibility. BZ2File objects now include the performance\n\
813optimizations previously implemented in the xreadlines module.\n\
814");
815
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816PyDoc_STRVAR(BZ2File_write__doc__,
817"write(data) -> None\n\
818\n\
819Write the 'data' string to file. Note that due to buffering, close() may\n\
820be needed before the file on disk reflects the data written.\n\
821");
822
823/* This is a hacked version of Python's fileobject.c:file_write(). */
824static PyObject *
825BZ2File_write(BZ2FileObject *self, PyObject *args)
826{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000827 PyObject *ret = NULL;
828 Py_buffer pbuf;
829 char *buf;
830 int len;
831 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000832
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000833 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
834 return NULL;
835 buf = pbuf.buf;
836 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000837
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000838 ACQUIRE_LOCK(self);
839 switch (self->mode) {
840 case MODE_WRITE:
841 break;
Tim Peterse3228092002-11-09 04:21:44 +0000842
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000843 case MODE_CLOSED:
844 PyErr_SetString(PyExc_ValueError,
845 "I/O operation on closed file");
846 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000847
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000848 default:
849 PyErr_SetString(PyExc_IOError,
850 "file is not ready for writing");
851 goto cleanup;
852 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000853
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000854 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000855
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000856 Py_BEGIN_ALLOW_THREADS
857 BZ2_bzWrite (&bzerror, self->fp, buf, len);
858 self->pos += len;
859 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000860
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000861 if (bzerror != BZ_OK) {
862 Util_CatchBZ2Error(bzerror);
863 goto cleanup;
864 }
Tim Peterse3228092002-11-09 04:21:44 +0000865
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000866 Py_INCREF(Py_None);
867 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000868
869cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000870 PyBuffer_Release(&pbuf);
871 RELEASE_LOCK(self);
872 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000873}
874
875PyDoc_STRVAR(BZ2File_writelines__doc__,
876"writelines(sequence_of_strings) -> None\n\
877\n\
878Write the sequence of strings to the file. Note that newlines are not\n\
879added. The sequence can be any iterable object producing strings. This is\n\
880equivalent to calling write() for each string.\n\
881");
882
883/* This is a hacked version of Python's fileobject.c:file_writelines(). */
884static PyObject *
885BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
886{
887#define CHUNKSIZE 1000
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000888 PyObject *list = NULL;
889 PyObject *iter = NULL;
890 PyObject *ret = NULL;
891 PyObject *line;
892 int i, j, index, len, islist;
893 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000894
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000895 ACQUIRE_LOCK(self);
896 switch (self->mode) {
897 case MODE_WRITE:
898 break;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000899
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000900 case MODE_CLOSED:
901 PyErr_SetString(PyExc_ValueError,
902 "I/O operation on closed file");
903 goto error;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000904
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000905 default:
906 PyErr_SetString(PyExc_IOError,
907 "file is not ready for writing");
908 goto error;
909 }
Georg Brandl3335a7a2006-08-14 21:42:55 +0000910
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000911 islist = PyList_Check(seq);
912 if (!islist) {
913 iter = PyObject_GetIter(seq);
914 if (iter == NULL) {
915 PyErr_SetString(PyExc_TypeError,
916 "writelines() requires an iterable argument");
917 goto error;
918 }
919 list = PyList_New(CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000923
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000924 /* Strategy: slurp CHUNKSIZE lines into a private list,
925 checking that they are all strings, then write that list
926 without holding the interpreter lock, then come back for more. */
927 for (index = 0; ; index += CHUNKSIZE) {
928 if (islist) {
929 Py_XDECREF(list);
930 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
931 if (list == NULL)
932 goto error;
933 j = PyList_GET_SIZE(list);
934 }
935 else {
936 for (j = 0; j < CHUNKSIZE; j++) {
937 line = PyIter_Next(iter);
938 if (line == NULL) {
939 if (PyErr_Occurred())
940 goto error;
941 break;
942 }
943 PyList_SetItem(list, j, line);
944 }
945 }
946 if (j == 0)
947 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000948
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000949 /* Check that all entries are indeed strings. If not,
950 apply the same rules as for file.write() and
951 convert the rets to strings. This is slow, but
952 seems to be the only way since all conversion APIs
953 could potentially execute Python code. */
954 for (i = 0; i < j; i++) {
955 PyObject *v = PyList_GET_ITEM(list, i);
956 if (!PyString_Check(v)) {
957 const char *buffer;
958 Py_ssize_t len;
959 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
960 PyErr_SetString(PyExc_TypeError,
961 "writelines() "
962 "argument must be "
963 "a sequence of "
964 "strings");
965 goto error;
966 }
967 line = PyString_FromStringAndSize(buffer,
968 len);
969 if (line == NULL)
970 goto error;
971 Py_DECREF(v);
972 PyList_SET_ITEM(list, i, line);
973 }
974 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000975
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000976 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000977
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000978 /* Since we are releasing the global lock, the
979 following code may *not* execute Python code. */
980 Py_BEGIN_ALLOW_THREADS
981 for (i = 0; i < j; i++) {
982 line = PyList_GET_ITEM(list, i);
983 len = PyString_GET_SIZE(line);
984 BZ2_bzWrite (&bzerror, self->fp,
985 PyString_AS_STRING(line), len);
986 if (bzerror != BZ_OK) {
987 Py_BLOCK_THREADS
988 Util_CatchBZ2Error(bzerror);
989 goto error;
990 }
991 }
992 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000993
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000994 if (j < CHUNKSIZE)
995 break;
996 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000997
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000998 Py_INCREF(Py_None);
999 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000
1001 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001002 RELEASE_LOCK(self);
1003 Py_XDECREF(list);
1004 Py_XDECREF(iter);
1005 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001006#undef CHUNKSIZE
1007}
1008
1009PyDoc_STRVAR(BZ2File_seek__doc__,
1010"seek(offset [, whence]) -> None\n\
1011\n\
1012Move to new file position. Argument offset is a byte count. Optional\n\
1013argument whence defaults to 0 (offset from start of file, offset\n\
1014should be >= 0); other values are 1 (move relative to current position,\n\
1015positive or negative), and 2 (move relative to end of file, usually\n\
1016negative, although many platforms allow seeking beyond the end of a file).\n\
1017\n\
1018Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1019the operation may be extremely slow.\n\
1020");
1021
1022static PyObject *
1023BZ2File_seek(BZ2FileObject *self, PyObject *args)
1024{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001025 int where = 0;
1026 PyObject *offobj;
1027 Py_off_t offset;
1028 char small_buffer[SMALLCHUNK];
1029 char *buffer = small_buffer;
1030 size_t buffersize = SMALLCHUNK;
1031 Py_off_t bytesread = 0;
1032 size_t readsize;
1033 int chunksize;
1034 int bzerror;
1035 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001036
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001037 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1038 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001039#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001040 offset = PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001041#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001042 offset = PyLong_Check(offobj) ?
1043 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001044#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001045 if (PyErr_Occurred())
1046 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001048 ACQUIRE_LOCK(self);
1049 Util_DropReadAhead(self);
1050 switch (self->mode) {
1051 case MODE_READ:
1052 case MODE_READ_EOF:
1053 break;
Tim Peterse3228092002-11-09 04:21:44 +00001054
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001055 case MODE_CLOSED:
1056 PyErr_SetString(PyExc_ValueError,
1057 "I/O operation on closed file");
1058 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001059
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001060 default:
1061 PyErr_SetString(PyExc_IOError,
1062 "seek works only while reading");
1063 goto cleanup;
1064 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001066 if (where == 2) {
1067 if (self->size == -1) {
1068 assert(self->mode != MODE_READ_EOF);
1069 for (;;) {
1070 Py_BEGIN_ALLOW_THREADS
1071 chunksize = Util_UnivNewlineRead(
1072 &bzerror, self->fp,
1073 buffer, buffersize,
1074 self);
1075 self->pos += chunksize;
1076 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001077
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 break;
1081 } else if (bzerror != BZ_OK) {
1082 Util_CatchBZ2Error(bzerror);
1083 goto cleanup;
1084 }
1085 }
1086 self->mode = MODE_READ_EOF;
1087 self->size = self->pos;
1088 bytesread = 0;
1089 }
1090 offset = self->size + offset;
1091 } else if (where == 1) {
1092 offset = self->pos + offset;
1093 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001095 /* Before getting here, offset must be the absolute position the file
1096 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +00001097
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001098 if (offset >= self->pos) {
1099 /* we can move forward */
1100 offset -= self->pos;
1101 } else {
1102 /* we cannot move back, so rewind the stream */
1103 BZ2_bzReadClose(&bzerror, self->fp);
1104 if (self->fp) {
1105 PyFile_DecUseCount((PyFileObject *)self->file);
1106 self->fp = NULL;
1107 }
1108 if (bzerror != BZ_OK) {
1109 Util_CatchBZ2Error(bzerror);
1110 goto cleanup;
1111 }
1112 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1113 if (!ret)
1114 goto cleanup;
1115 Py_DECREF(ret);
1116 ret = NULL;
1117 self->pos = 0;
1118 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1119 0, 0, NULL, 0);
1120 if (self->fp)
1121 PyFile_IncUseCount((PyFileObject *)self->file);
1122 if (bzerror != BZ_OK) {
1123 Util_CatchBZ2Error(bzerror);
1124 goto cleanup;
1125 }
1126 self->mode = MODE_READ;
1127 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001128
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001129 if (offset <= 0 || self->mode == MODE_READ_EOF)
1130 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001131
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001132 /* Before getting here, offset must be set to the number of bytes
1133 * to walk forward. */
1134 for (;;) {
1135 if (offset-bytesread > buffersize)
1136 readsize = buffersize;
1137 else
1138 /* offset might be wider that readsize, but the result
1139 * of the subtraction is bound by buffersize (see the
1140 * condition above). buffersize is 8192. */
1141 readsize = (size_t)(offset-bytesread);
1142 Py_BEGIN_ALLOW_THREADS
1143 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1144 buffer, readsize, self);
1145 self->pos += chunksize;
1146 Py_END_ALLOW_THREADS
1147 bytesread += chunksize;
1148 if (bzerror == BZ_STREAM_END) {
1149 self->size = self->pos;
1150 self->mode = MODE_READ_EOF;
1151 break;
1152 } else if (bzerror != BZ_OK) {
1153 Util_CatchBZ2Error(bzerror);
1154 goto cleanup;
1155 }
1156 if (bytesread == offset)
1157 break;
1158 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001159
1160exit:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001161 Py_INCREF(Py_None);
1162 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163
1164cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001165 RELEASE_LOCK(self);
1166 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167}
1168
1169PyDoc_STRVAR(BZ2File_tell__doc__,
1170"tell() -> int\n\
1171\n\
1172Return the current file position, an integer (may be a long integer).\n\
1173");
1174
1175static PyObject *
1176BZ2File_tell(BZ2FileObject *self, PyObject *args)
1177{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001178 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001179
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001180 if (self->mode == MODE_CLOSED) {
1181 PyErr_SetString(PyExc_ValueError,
1182 "I/O operation on closed file");
1183 goto cleanup;
1184 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185
Georg Brandla8bcecc2005-09-03 07:49:53 +00001186#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001187 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001188#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001189 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001190#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001191
1192cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001193 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001194}
1195
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196PyDoc_STRVAR(BZ2File_close__doc__,
1197"close() -> None or (perhaps) an integer\n\
1198\n\
1199Close the file. Sets data attribute .closed to true. A closed file\n\
1200cannot be used for further I/O operations. close() may be called more\n\
1201than once without error.\n\
1202");
1203
1204static PyObject *
1205BZ2File_close(BZ2FileObject *self)
1206{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001207 PyObject *ret = NULL;
1208 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001209
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001210 ACQUIRE_LOCK(self);
1211 switch (self->mode) {
1212 case MODE_READ:
1213 case MODE_READ_EOF:
1214 BZ2_bzReadClose(&bzerror, self->fp);
1215 break;
1216 case MODE_WRITE:
1217 BZ2_bzWriteClose(&bzerror, self->fp,
1218 0, NULL, NULL);
1219 break;
1220 }
1221 if (self->fp) {
1222 PyFile_DecUseCount((PyFileObject *)self->file);
1223 self->fp = NULL;
1224 }
1225 self->mode = MODE_CLOSED;
1226 ret = PyObject_CallMethod(self->file, "close", NULL);
1227 if (bzerror != BZ_OK) {
1228 Util_CatchBZ2Error(bzerror);
1229 Py_XDECREF(ret);
1230 ret = NULL;
1231 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001232
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001233 RELEASE_LOCK(self);
1234 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001235}
1236
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001237static PyObject *BZ2File_getiter(BZ2FileObject *self);
1238
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001239static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001240 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1241 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1242 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1243 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1244 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1245 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1246 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1247 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1248 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1249 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250};
1251
1252
1253/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001254/* Getters and setters of BZ2File. */
1255
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001256/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1257static PyObject *
1258BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1259{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001260 switch (self->f_newlinetypes) {
1261 case NEWLINE_UNKNOWN:
1262 Py_INCREF(Py_None);
1263 return Py_None;
1264 case NEWLINE_CR:
1265 return PyString_FromString("\r");
1266 case NEWLINE_LF:
1267 return PyString_FromString("\n");
1268 case NEWLINE_CR|NEWLINE_LF:
1269 return Py_BuildValue("(ss)", "\r", "\n");
1270 case NEWLINE_CRLF:
1271 return PyString_FromString("\r\n");
1272 case NEWLINE_CR|NEWLINE_CRLF:
1273 return Py_BuildValue("(ss)", "\r", "\r\n");
1274 case NEWLINE_LF|NEWLINE_CRLF:
1275 return Py_BuildValue("(ss)", "\n", "\r\n");
1276 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1277 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1278 default:
1279 PyErr_Format(PyExc_SystemError,
1280 "Unknown newlines value 0x%x\n",
1281 self->f_newlinetypes);
1282 return NULL;
1283 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001284}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001285
1286static PyObject *
1287BZ2File_get_closed(BZ2FileObject *self, void *closure)
1288{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001289 return PyInt_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001290}
1291
1292static PyObject *
1293BZ2File_get_mode(BZ2FileObject *self, void *closure)
1294{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001295 return PyObject_GetAttrString(self->file, "mode");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001296}
1297
1298static PyObject *
1299BZ2File_get_name(BZ2FileObject *self, void *closure)
1300{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001301 return PyObject_GetAttrString(self->file, "name");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001302}
1303
1304static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001305 {"closed", (getter)BZ2File_get_closed, NULL,
1306 "True if the file is closed"},
1307 {"newlines", (getter)BZ2File_get_newlines, NULL,
1308 "end-of-line convention used in this file"},
1309 {"mode", (getter)BZ2File_get_mode, NULL,
1310 "file mode ('r', 'w', or 'U')"},
1311 {"name", (getter)BZ2File_get_name, NULL,
1312 "file name"},
1313 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001314};
1315
1316
1317/* ===================================================================== */
1318/* Members of BZ2File_Type. */
1319
1320#undef OFF
1321#define OFF(x) offsetof(BZ2FileObject, x)
1322
1323static PyMemberDef BZ2File_members[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001324 {"softspace", T_INT, OFF(f_softspace), 0,
1325 "flag indicating that a space needs to be printed; used by print"},
1326 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001327};
1328
1329/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001330/* Slot definitions for BZ2File_Type. */
1331
1332static int
1333BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1334{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001335 static char *kwlist[] = {"filename", "mode", "buffering",
1336 "compresslevel", 0};
1337 PyObject *name;
1338 char *mode = "r";
1339 int buffering = -1;
1340 int compresslevel = 9;
1341 int bzerror;
1342 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001344 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001345
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001346 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1347 kwlist, &name, &mode, &buffering,
1348 &compresslevel))
1349 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001350
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001351 if (compresslevel < 1 || compresslevel > 9) {
1352 PyErr_SetString(PyExc_ValueError,
1353 "compresslevel must be between 1 and 9");
1354 return -1;
1355 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001356
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001357 for (;;) {
1358 int error = 0;
1359 switch (*mode) {
1360 case 'r':
1361 case 'w':
1362 if (mode_char)
1363 error = 1;
1364 mode_char = *mode;
1365 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001366
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001367 case 'b':
1368 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001370 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001371#ifdef __VMS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001372 self->f_univ_newline = 0;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001373#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001374 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001375#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001376 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001378 default:
1379 error = 1;
1380 break;
1381 }
1382 if (error) {
1383 PyErr_Format(PyExc_ValueError,
1384 "invalid mode char %c", *mode);
1385 return -1;
1386 }
1387 mode++;
1388 if (*mode == '\0')
1389 break;
1390 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001391
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001392 if (mode_char == 0) {
1393 mode_char = 'r';
1394 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001395
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001396 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001397
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001398 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1399 name, mode, buffering);
1400 if (self->file == NULL)
1401 return -1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001402
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001403 /* From now on, we have stuff to dealloc, so jump to error label
1404 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001405
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001406#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001407 self->lock = PyThread_allocate_lock();
1408 if (!self->lock) {
1409 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1410 goto error;
1411 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001412#endif
1413
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001414 if (mode_char == 'r')
1415 self->fp = BZ2_bzReadOpen(&bzerror,
1416 PyFile_AsFile(self->file),
1417 0, 0, NULL, 0);
1418 else
1419 self->fp = BZ2_bzWriteOpen(&bzerror,
1420 PyFile_AsFile(self->file),
1421 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001422
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001423 if (bzerror != BZ_OK) {
1424 Util_CatchBZ2Error(bzerror);
1425 goto error;
1426 }
1427 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001428
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001429 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001430
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001431 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001432
1433error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001434 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001435#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001436 if (self->lock) {
1437 PyThread_free_lock(self->lock);
1438 self->lock = NULL;
1439 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001440#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001441 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001442}
1443
1444static void
1445BZ2File_dealloc(BZ2FileObject *self)
1446{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001447 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001448#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001449 if (self->lock)
1450 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001451#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001452 switch (self->mode) {
1453 case MODE_READ:
1454 case MODE_READ_EOF:
1455 BZ2_bzReadClose(&bzerror, self->fp);
1456 break;
1457 case MODE_WRITE:
1458 BZ2_bzWriteClose(&bzerror, self->fp,
1459 0, NULL, NULL);
1460 break;
1461 }
1462 if (self->fp) {
1463 PyFile_DecUseCount((PyFileObject *)self->file);
1464 self->fp = NULL;
1465 }
1466 Util_DropReadAhead(self);
1467 Py_XDECREF(self->file);
1468 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001469}
1470
1471/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1472static PyObject *
1473BZ2File_getiter(BZ2FileObject *self)
1474{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001475 if (self->mode == MODE_CLOSED) {
1476 PyErr_SetString(PyExc_ValueError,
1477 "I/O operation on closed file");
1478 return NULL;
1479 }
1480 Py_INCREF((PyObject*)self);
1481 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001482}
1483
1484/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1485#define READAHEAD_BUFSIZE 8192
1486static PyObject *
1487BZ2File_iternext(BZ2FileObject *self)
1488{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001489 PyStringObject* ret;
1490 ACQUIRE_LOCK(self);
1491 if (self->mode == MODE_CLOSED) {
1492 RELEASE_LOCK(self);
1493 PyErr_SetString(PyExc_ValueError,
1494 "I/O operation on closed file");
1495 return NULL;
1496 }
1497 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1498 RELEASE_LOCK(self);
1499 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1500 Py_XDECREF(ret);
1501 return NULL;
1502 }
1503 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504}
1505
1506/* ===================================================================== */
1507/* BZ2File_Type definition. */
1508
1509PyDoc_VAR(BZ2File__doc__) =
1510PyDoc_STR(
1511"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1512\n\
1513Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1514writing. When opened for writing, the file will be created if it doesn't\n\
1515exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1516unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1517is given, must be a number between 1 and 9.\n\
1518")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001519PyDoc_STR(
1520"\n\
1521Add a 'U' to mode to open the file for input with universal newline\n\
1522support. Any line ending in the input file will be seen as a '\\n' in\n\
1523Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1524for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1525'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1526newlines are available only when reading.\n\
1527")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001528;
1529
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001530static PyTypeObject BZ2File_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001531 PyVarObject_HEAD_INIT(NULL, 0)
1532 "bz2.BZ2File", /*tp_name*/
1533 sizeof(BZ2FileObject), /*tp_basicsize*/
1534 0, /*tp_itemsize*/
1535 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1536 0, /*tp_print*/
1537 0, /*tp_getattr*/
1538 0, /*tp_setattr*/
1539 0, /*tp_compare*/
1540 0, /*tp_repr*/
1541 0, /*tp_as_number*/
1542 0, /*tp_as_sequence*/
1543 0, /*tp_as_mapping*/
1544 0, /*tp_hash*/
1545 0, /*tp_call*/
1546 0, /*tp_str*/
1547 PyObject_GenericGetAttr,/*tp_getattro*/
1548 PyObject_GenericSetAttr,/*tp_setattro*/
1549 0, /*tp_as_buffer*/
1550 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1551 BZ2File__doc__, /*tp_doc*/
1552 0, /*tp_traverse*/
1553 0, /*tp_clear*/
1554 0, /*tp_richcompare*/
1555 0, /*tp_weaklistoffset*/
1556 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1557 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1558 BZ2File_methods, /*tp_methods*/
1559 BZ2File_members, /*tp_members*/
1560 BZ2File_getset, /*tp_getset*/
1561 0, /*tp_base*/
1562 0, /*tp_dict*/
1563 0, /*tp_descr_get*/
1564 0, /*tp_descr_set*/
1565 0, /*tp_dictoffset*/
1566 (initproc)BZ2File_init, /*tp_init*/
1567 PyType_GenericAlloc, /*tp_alloc*/
1568 PyType_GenericNew, /*tp_new*/
1569 _PyObject_Del, /*tp_free*/
1570 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001571};
1572
1573
1574/* ===================================================================== */
1575/* Methods of BZ2Comp. */
1576
1577PyDoc_STRVAR(BZ2Comp_compress__doc__,
1578"compress(data) -> string\n\
1579\n\
1580Provide more data to the compressor object. It will return chunks of\n\
1581compressed data whenever possible. When you've finished providing data\n\
1582to compress, call the flush() method to finish the compression process,\n\
1583and return what is left in the internal buffers.\n\
1584");
1585
1586static PyObject *
1587BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1588{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001589 Py_buffer pdata;
1590 char *data;
1591 int datasize;
1592 int bufsize = SMALLCHUNK;
1593 PY_LONG_LONG totalout;
1594 PyObject *ret = NULL;
1595 bz_stream *bzs = &self->bzs;
1596 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001597
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001598 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1599 return NULL;
1600 data = pdata.buf;
1601 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001602
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001603 if (datasize == 0) {
1604 PyBuffer_Release(&pdata);
1605 return PyString_FromString("");
1606 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001607
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001608 ACQUIRE_LOCK(self);
1609 if (!self->running) {
1610 PyErr_SetString(PyExc_ValueError,
1611 "this object was already flushed");
1612 goto error;
1613 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001614
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001615 ret = PyString_FromStringAndSize(NULL, bufsize);
1616 if (!ret)
1617 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001618
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001619 bzs->next_in = data;
1620 bzs->avail_in = datasize;
1621 bzs->next_out = BUF(ret);
1622 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001623
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001624 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001625
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001626 for (;;) {
1627 Py_BEGIN_ALLOW_THREADS
1628 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1629 Py_END_ALLOW_THREADS
1630 if (bzerror != BZ_RUN_OK) {
1631 Util_CatchBZ2Error(bzerror);
1632 goto error;
1633 }
1634 if (bzs->avail_in == 0)
1635 break; /* no more input data */
1636 if (bzs->avail_out == 0) {
1637 bufsize = Util_NewBufferSize(bufsize);
1638 if (_PyString_Resize(&ret, bufsize) < 0) {
1639 BZ2_bzCompressEnd(bzs);
1640 goto error;
1641 }
1642 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1643 - totalout);
1644 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1645 }
1646 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001647
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001648 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001649
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001650 RELEASE_LOCK(self);
1651 PyBuffer_Release(&pdata);
1652 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001653
1654error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001655 RELEASE_LOCK(self);
1656 PyBuffer_Release(&pdata);
1657 Py_XDECREF(ret);
1658 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001659}
1660
1661PyDoc_STRVAR(BZ2Comp_flush__doc__,
1662"flush() -> string\n\
1663\n\
1664Finish the compression process and return what is left in internal buffers.\n\
1665You must not use the compressor object after calling this method.\n\
1666");
1667
1668static PyObject *
1669BZ2Comp_flush(BZ2CompObject *self)
1670{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001671 int bufsize = SMALLCHUNK;
1672 PyObject *ret = NULL;
1673 bz_stream *bzs = &self->bzs;
1674 PY_LONG_LONG totalout;
1675 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001676
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001677 ACQUIRE_LOCK(self);
1678 if (!self->running) {
1679 PyErr_SetString(PyExc_ValueError, "object was already "
1680 "flushed");
1681 goto error;
1682 }
1683 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001684
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001685 ret = PyString_FromStringAndSize(NULL, bufsize);
1686 if (!ret)
1687 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001688
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001689 bzs->next_out = BUF(ret);
1690 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001691
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001692 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001693
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001694 for (;;) {
1695 Py_BEGIN_ALLOW_THREADS
1696 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1697 Py_END_ALLOW_THREADS
1698 if (bzerror == BZ_STREAM_END) {
1699 break;
1700 } else if (bzerror != BZ_FINISH_OK) {
1701 Util_CatchBZ2Error(bzerror);
1702 goto error;
1703 }
1704 if (bzs->avail_out == 0) {
1705 bufsize = Util_NewBufferSize(bufsize);
1706 if (_PyString_Resize(&ret, bufsize) < 0)
1707 goto error;
1708 bzs->next_out = BUF(ret);
1709 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1710 - totalout);
1711 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1712 }
1713 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001714
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001715 if (bzs->avail_out != 0)
1716 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001717
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001718 RELEASE_LOCK(self);
1719 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001720
1721error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001722 RELEASE_LOCK(self);
1723 Py_XDECREF(ret);
1724 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001725}
1726
1727static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001728 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1729 BZ2Comp_compress__doc__},
1730 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1731 BZ2Comp_flush__doc__},
1732 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001733};
1734
1735
1736/* ===================================================================== */
1737/* Slot definitions for BZ2Comp_Type. */
1738
1739static int
1740BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1741{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001742 int compresslevel = 9;
1743 int bzerror;
1744 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001745
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001746 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1747 kwlist, &compresslevel))
1748 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001749
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001750 if (compresslevel < 1 || compresslevel > 9) {
1751 PyErr_SetString(PyExc_ValueError,
1752 "compresslevel must be between 1 and 9");
1753 goto error;
1754 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001755
1756#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001757 self->lock = PyThread_allocate_lock();
1758 if (!self->lock) {
1759 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1760 goto error;
1761 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001762#endif
1763
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001764 memset(&self->bzs, 0, sizeof(bz_stream));
1765 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1766 if (bzerror != BZ_OK) {
1767 Util_CatchBZ2Error(bzerror);
1768 goto error;
1769 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001770
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001771 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001772
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001773 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001774error:
1775#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001776 if (self->lock) {
1777 PyThread_free_lock(self->lock);
1778 self->lock = NULL;
1779 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001780#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001781 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001782}
1783
1784static void
1785BZ2Comp_dealloc(BZ2CompObject *self)
1786{
1787#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001788 if (self->lock)
1789 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001790#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001791 BZ2_bzCompressEnd(&self->bzs);
1792 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001793}
1794
1795
1796/* ===================================================================== */
1797/* BZ2Comp_Type definition. */
1798
1799PyDoc_STRVAR(BZ2Comp__doc__,
1800"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1801\n\
1802Create a new compressor object. This object may be used to compress\n\
1803data sequentially. If you want to compress data in one shot, use the\n\
1804compress() function instead. The compresslevel parameter, if given,\n\
1805must be a number between 1 and 9.\n\
1806");
1807
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001808static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001809 PyVarObject_HEAD_INIT(NULL, 0)
1810 "bz2.BZ2Compressor", /*tp_name*/
1811 sizeof(BZ2CompObject), /*tp_basicsize*/
1812 0, /*tp_itemsize*/
1813 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1814 0, /*tp_print*/
1815 0, /*tp_getattr*/
1816 0, /*tp_setattr*/
1817 0, /*tp_compare*/
1818 0, /*tp_repr*/
1819 0, /*tp_as_number*/
1820 0, /*tp_as_sequence*/
1821 0, /*tp_as_mapping*/
1822 0, /*tp_hash*/
1823 0, /*tp_call*/
1824 0, /*tp_str*/
1825 PyObject_GenericGetAttr,/*tp_getattro*/
1826 PyObject_GenericSetAttr,/*tp_setattro*/
1827 0, /*tp_as_buffer*/
1828 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1829 BZ2Comp__doc__, /*tp_doc*/
1830 0, /*tp_traverse*/
1831 0, /*tp_clear*/
1832 0, /*tp_richcompare*/
1833 0, /*tp_weaklistoffset*/
1834 0, /*tp_iter*/
1835 0, /*tp_iternext*/
1836 BZ2Comp_methods, /*tp_methods*/
1837 0, /*tp_members*/
1838 0, /*tp_getset*/
1839 0, /*tp_base*/
1840 0, /*tp_dict*/
1841 0, /*tp_descr_get*/
1842 0, /*tp_descr_set*/
1843 0, /*tp_dictoffset*/
1844 (initproc)BZ2Comp_init, /*tp_init*/
1845 PyType_GenericAlloc, /*tp_alloc*/
1846 PyType_GenericNew, /*tp_new*/
1847 _PyObject_Del, /*tp_free*/
1848 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001849};
1850
1851
1852/* ===================================================================== */
1853/* Members of BZ2Decomp. */
1854
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001855#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001856#define OFF(x) offsetof(BZ2DecompObject, x)
1857
1858static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001859 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1860 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001861};
1862
1863
1864/* ===================================================================== */
1865/* Methods of BZ2Decomp. */
1866
1867PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1868"decompress(data) -> string\n\
1869\n\
1870Provide more data to the decompressor object. It will return chunks\n\
1871of decompressed data whenever possible. If you try to decompress data\n\
1872after the end of stream is found, EOFError will be raised. If any data\n\
1873was found after the end of stream, it'll be ignored and saved in\n\
1874unused_data attribute.\n\
1875");
1876
1877static PyObject *
1878BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1879{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001880 Py_buffer pdata;
1881 char *data;
1882 int datasize;
1883 int bufsize = SMALLCHUNK;
1884 PY_LONG_LONG totalout;
1885 PyObject *ret = NULL;
1886 bz_stream *bzs = &self->bzs;
1887 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001888
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001889 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1890 return NULL;
1891 data = pdata.buf;
1892 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001893
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001894 ACQUIRE_LOCK(self);
1895 if (!self->running) {
1896 PyErr_SetString(PyExc_EOFError, "end of stream was "
1897 "already found");
1898 goto error;
1899 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001900
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001901 ret = PyString_FromStringAndSize(NULL, bufsize);
1902 if (!ret)
1903 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001904
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001905 bzs->next_in = data;
1906 bzs->avail_in = datasize;
1907 bzs->next_out = BUF(ret);
1908 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001909
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001910 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001911
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001912 for (;;) {
1913 Py_BEGIN_ALLOW_THREADS
1914 bzerror = BZ2_bzDecompress(bzs);
1915 Py_END_ALLOW_THREADS
1916 if (bzerror == BZ_STREAM_END) {
1917 if (bzs->avail_in != 0) {
1918 Py_DECREF(self->unused_data);
1919 self->unused_data =
1920 PyString_FromStringAndSize(bzs->next_in,
1921 bzs->avail_in);
1922 }
1923 self->running = 0;
1924 break;
1925 }
1926 if (bzerror != BZ_OK) {
1927 Util_CatchBZ2Error(bzerror);
1928 goto error;
1929 }
1930 if (bzs->avail_in == 0)
1931 break; /* no more input data */
1932 if (bzs->avail_out == 0) {
1933 bufsize = Util_NewBufferSize(bufsize);
1934 if (_PyString_Resize(&ret, bufsize) < 0) {
1935 BZ2_bzDecompressEnd(bzs);
1936 goto error;
1937 }
1938 bzs->next_out = BUF(ret);
1939 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1940 - totalout);
1941 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1942 }
1943 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001944
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001945 if (bzs->avail_out != 0)
1946 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001947
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001948 RELEASE_LOCK(self);
1949 PyBuffer_Release(&pdata);
1950 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951
1952error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001953 RELEASE_LOCK(self);
1954 PyBuffer_Release(&pdata);
1955 Py_XDECREF(ret);
1956 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001957}
1958
1959static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001960 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1961 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001962};
1963
1964
1965/* ===================================================================== */
1966/* Slot definitions for BZ2Decomp_Type. */
1967
1968static int
1969BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1970{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001971 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001972
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001973 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1974 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001975
1976#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001977 self->lock = PyThread_allocate_lock();
1978 if (!self->lock) {
1979 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1980 goto error;
1981 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001982#endif
1983
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001984 self->unused_data = PyString_FromString("");
1985 if (!self->unused_data)
1986 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001987
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001988 memset(&self->bzs, 0, sizeof(bz_stream));
1989 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1990 if (bzerror != BZ_OK) {
1991 Util_CatchBZ2Error(bzerror);
1992 goto error;
1993 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001994
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001995 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001996
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001997 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001998
1999error:
2000#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002001 if (self->lock) {
2002 PyThread_free_lock(self->lock);
2003 self->lock = NULL;
2004 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002005#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002006 Py_CLEAR(self->unused_data);
2007 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002008}
2009
2010static void
2011BZ2Decomp_dealloc(BZ2DecompObject *self)
2012{
2013#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002014 if (self->lock)
2015 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002016#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002017 Py_XDECREF(self->unused_data);
2018 BZ2_bzDecompressEnd(&self->bzs);
2019 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002020}
2021
2022
2023/* ===================================================================== */
2024/* BZ2Decomp_Type definition. */
2025
2026PyDoc_STRVAR(BZ2Decomp__doc__,
2027"BZ2Decompressor() -> decompressor object\n\
2028\n\
2029Create a new decompressor object. This object may be used to decompress\n\
2030data sequentially. If you want to decompress data in one shot, use the\n\
2031decompress() function instead.\n\
2032");
2033
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002034static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002035 PyVarObject_HEAD_INIT(NULL, 0)
2036 "bz2.BZ2Decompressor", /*tp_name*/
2037 sizeof(BZ2DecompObject), /*tp_basicsize*/
2038 0, /*tp_itemsize*/
2039 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2040 0, /*tp_print*/
2041 0, /*tp_getattr*/
2042 0, /*tp_setattr*/
2043 0, /*tp_compare*/
2044 0, /*tp_repr*/
2045 0, /*tp_as_number*/
2046 0, /*tp_as_sequence*/
2047 0, /*tp_as_mapping*/
2048 0, /*tp_hash*/
2049 0, /*tp_call*/
2050 0, /*tp_str*/
2051 PyObject_GenericGetAttr,/*tp_getattro*/
2052 PyObject_GenericSetAttr,/*tp_setattro*/
2053 0, /*tp_as_buffer*/
2054 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2055 BZ2Decomp__doc__, /*tp_doc*/
2056 0, /*tp_traverse*/
2057 0, /*tp_clear*/
2058 0, /*tp_richcompare*/
2059 0, /*tp_weaklistoffset*/
2060 0, /*tp_iter*/
2061 0, /*tp_iternext*/
2062 BZ2Decomp_methods, /*tp_methods*/
2063 BZ2Decomp_members, /*tp_members*/
2064 0, /*tp_getset*/
2065 0, /*tp_base*/
2066 0, /*tp_dict*/
2067 0, /*tp_descr_get*/
2068 0, /*tp_descr_set*/
2069 0, /*tp_dictoffset*/
2070 (initproc)BZ2Decomp_init, /*tp_init*/
2071 PyType_GenericAlloc, /*tp_alloc*/
2072 PyType_GenericNew, /*tp_new*/
2073 _PyObject_Del, /*tp_free*/
2074 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002075};
2076
2077
2078/* ===================================================================== */
2079/* Module functions. */
2080
2081PyDoc_STRVAR(bz2_compress__doc__,
2082"compress(data [, compresslevel=9]) -> string\n\
2083\n\
2084Compress data in one shot. If you want to compress data sequentially,\n\
2085use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2086given, must be a number between 1 and 9.\n\
2087");
2088
2089static PyObject *
2090bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2091{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002092 int compresslevel=9;
2093 Py_buffer pdata;
2094 char *data;
2095 int datasize;
2096 int bufsize;
2097 PyObject *ret = NULL;
2098 bz_stream _bzs;
2099 bz_stream *bzs = &_bzs;
2100 int bzerror;
2101 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002102
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002103 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2104 kwlist, &pdata,
2105 &compresslevel))
2106 return NULL;
2107 data = pdata.buf;
2108 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002109
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002110 if (compresslevel < 1 || compresslevel > 9) {
2111 PyErr_SetString(PyExc_ValueError,
2112 "compresslevel must be between 1 and 9");
2113 PyBuffer_Release(&pdata);
2114 return NULL;
2115 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002116
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002117 /* Conforming to bz2 manual, this is large enough to fit compressed
2118 * data in one shot. We will check it later anyway. */
2119 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002120
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002121 ret = PyString_FromStringAndSize(NULL, bufsize);
2122 if (!ret) {
2123 PyBuffer_Release(&pdata);
2124 return NULL;
2125 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002126
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002127 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002128
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002129 bzs->next_in = data;
2130 bzs->avail_in = datasize;
2131 bzs->next_out = BUF(ret);
2132 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002133
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002134 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2135 if (bzerror != BZ_OK) {
2136 Util_CatchBZ2Error(bzerror);
2137 PyBuffer_Release(&pdata);
2138 Py_DECREF(ret);
2139 return NULL;
2140 }
Tim Peterse3228092002-11-09 04:21:44 +00002141
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002142 for (;;) {
2143 Py_BEGIN_ALLOW_THREADS
2144 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2145 Py_END_ALLOW_THREADS
2146 if (bzerror == BZ_STREAM_END) {
2147 break;
2148 } else if (bzerror != BZ_FINISH_OK) {
2149 BZ2_bzCompressEnd(bzs);
2150 Util_CatchBZ2Error(bzerror);
2151 PyBuffer_Release(&pdata);
2152 Py_DECREF(ret);
2153 return NULL;
2154 }
2155 if (bzs->avail_out == 0) {
2156 bufsize = Util_NewBufferSize(bufsize);
2157 if (_PyString_Resize(&ret, bufsize) < 0) {
2158 BZ2_bzCompressEnd(bzs);
2159 PyBuffer_Release(&pdata);
2160 Py_DECREF(ret);
2161 return NULL;
2162 }
2163 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2164 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2165 }
2166 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002167
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002168 if (bzs->avail_out != 0)
2169 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2170 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002171
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002172 PyBuffer_Release(&pdata);
2173 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002174}
2175
2176PyDoc_STRVAR(bz2_decompress__doc__,
2177"decompress(data) -> decompressed data\n\
2178\n\
2179Decompress data in one shot. If you want to decompress data sequentially,\n\
2180use an instance of BZ2Decompressor instead.\n\
2181");
2182
2183static PyObject *
2184bz2_decompress(PyObject *self, PyObject *args)
2185{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002186 Py_buffer pdata;
2187 char *data;
2188 int datasize;
2189 int bufsize = SMALLCHUNK;
2190 PyObject *ret;
2191 bz_stream _bzs;
2192 bz_stream *bzs = &_bzs;
2193 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002194
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002195 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2196 return NULL;
2197 data = pdata.buf;
2198 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002199
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002200 if (datasize == 0) {
2201 PyBuffer_Release(&pdata);
2202 return PyString_FromString("");
2203 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002204
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002205 ret = PyString_FromStringAndSize(NULL, bufsize);
2206 if (!ret) {
2207 PyBuffer_Release(&pdata);
2208 return NULL;
2209 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002210
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002211 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002212
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002213 bzs->next_in = data;
2214 bzs->avail_in = datasize;
2215 bzs->next_out = BUF(ret);
2216 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002217
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002218 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2219 if (bzerror != BZ_OK) {
2220 Util_CatchBZ2Error(bzerror);
2221 Py_DECREF(ret);
2222 PyBuffer_Release(&pdata);
2223 return NULL;
2224 }
Tim Peterse3228092002-11-09 04:21:44 +00002225
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002226 for (;;) {
2227 Py_BEGIN_ALLOW_THREADS
2228 bzerror = BZ2_bzDecompress(bzs);
2229 Py_END_ALLOW_THREADS
2230 if (bzerror == BZ_STREAM_END) {
2231 break;
2232 } else if (bzerror != BZ_OK) {
2233 BZ2_bzDecompressEnd(bzs);
2234 Util_CatchBZ2Error(bzerror);
2235 PyBuffer_Release(&pdata);
2236 Py_DECREF(ret);
2237 return NULL;
2238 }
2239 if (bzs->avail_in == 0) {
2240 BZ2_bzDecompressEnd(bzs);
2241 PyErr_SetString(PyExc_ValueError,
2242 "couldn't find end of stream");
2243 PyBuffer_Release(&pdata);
2244 Py_DECREF(ret);
2245 return NULL;
2246 }
2247 if (bzs->avail_out == 0) {
2248 bufsize = Util_NewBufferSize(bufsize);
2249 if (_PyString_Resize(&ret, bufsize) < 0) {
2250 BZ2_bzDecompressEnd(bzs);
2251 PyBuffer_Release(&pdata);
2252 Py_DECREF(ret);
2253 return NULL;
2254 }
2255 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2256 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2257 }
2258 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002259
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002260 if (bzs->avail_out != 0)
2261 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2262 BZ2_bzDecompressEnd(bzs);
2263 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002264
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002265 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002266}
2267
2268static PyMethodDef bz2_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002269 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2270 bz2_compress__doc__},
2271 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2272 bz2_decompress__doc__},
2273 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002274};
2275
2276/* ===================================================================== */
2277/* Initialization function. */
2278
2279PyDoc_STRVAR(bz2__doc__,
2280"The python bz2 module provides a comprehensive interface for\n\
2281the bz2 compression library. It implements a complete file\n\
2282interface, one shot (de)compression functions, and types for\n\
2283sequential (de)compression.\n\
2284");
2285
Neal Norwitz21d896c2003-07-01 20:15:21 +00002286PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002287initbz2(void)
2288{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002289 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002290
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002291 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2292 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2293 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002294
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002295 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2296 if (m == NULL)
2297 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002298
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002299 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002300
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002301 Py_INCREF(&BZ2File_Type);
2302 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002303
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002304 Py_INCREF(&BZ2Comp_Type);
2305 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002306
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002307 Py_INCREF(&BZ2Decomp_Type);
2308 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002309}