| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1 | /* | 
|  | 2 |  | 
|  | 3 | python-bz2 - python bz2 library interface | 
|  | 4 |  | 
|  | 5 | Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com> | 
|  | 6 | Copyright (c) 2002  Python Software Foundation; All Rights Reserved | 
|  | 7 |  | 
|  | 8 | */ | 
|  | 9 |  | 
| Martin v. Löwis | e17af7b | 2002-11-23 09:16:19 +0000 | [diff] [blame] | 10 | #include "Python.h" | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 11 | #include <stdio.h> | 
|  | 12 | #include <bzlib.h> | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 13 | #include "structmember.h" | 
|  | 14 |  | 
|  | 15 | #ifdef WITH_THREAD | 
|  | 16 | #include "pythread.h" | 
|  | 17 | #endif | 
|  | 18 |  | 
|  | 19 | static char __author__[] = | 
|  | 20 | "The bz2 python module was written by:\n\ | 
|  | 21 | \n\ | 
|  | 22 | Gustavo Niemeyer <niemeyer@conectiva.com>\n\ | 
|  | 23 | "; | 
|  | 24 |  | 
| Georg Brandl | 33a5f2a | 2005-08-21 14:16:04 +0000 | [diff] [blame] | 25 | /* Our very own off_t-like type, 64-bit if possible */ | 
|  | 26 | /* copied from Objects/fileobject.c */ | 
|  | 27 | #if !defined(HAVE_LARGEFILE_SUPPORT) | 
|  | 28 | typedef off_t Py_off_t; | 
|  | 29 | #elif SIZEOF_OFF_T >= 8 | 
|  | 30 | typedef off_t Py_off_t; | 
|  | 31 | #elif SIZEOF_FPOS_T >= 8 | 
|  | 32 | typedef fpos_t Py_off_t; | 
|  | 33 | #else | 
|  | 34 | #error "Large file support, but neither off_t nor fpos_t is large enough." | 
|  | 35 | #endif | 
|  | 36 |  | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 37 | #define BUF(v) PyString_AS_STRING((PyStringObject *)v) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 38 |  | 
|  | 39 | #define MODE_CLOSED   0 | 
|  | 40 | #define MODE_READ     1 | 
|  | 41 | #define MODE_READ_EOF 2 | 
|  | 42 | #define MODE_WRITE    3 | 
|  | 43 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 44 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 45 | #ifndef BZ_CONFIG_ERROR | 
| Gustavo Niemeyer | 7628f1f | 2003-04-27 06:25:24 +0000 | [diff] [blame] | 46 |  | 
|  | 47 | #define BZ2_bzRead bzRead | 
|  | 48 | #define BZ2_bzReadOpen bzReadOpen | 
|  | 49 | #define BZ2_bzReadClose bzReadClose | 
|  | 50 | #define BZ2_bzWrite bzWrite | 
|  | 51 | #define BZ2_bzWriteOpen bzWriteOpen | 
|  | 52 | #define BZ2_bzWriteClose bzWriteClose | 
|  | 53 | #define BZ2_bzCompress bzCompress | 
|  | 54 | #define BZ2_bzCompressInit bzCompressInit | 
|  | 55 | #define BZ2_bzCompressEnd bzCompressEnd | 
|  | 56 | #define BZ2_bzDecompress bzDecompress | 
|  | 57 | #define BZ2_bzDecompressInit bzDecompressInit | 
|  | 58 | #define BZ2_bzDecompressEnd bzDecompressEnd | 
|  | 59 |  | 
| Gustavo Niemeyer | 7628f1f | 2003-04-27 06:25:24 +0000 | [diff] [blame] | 60 | #endif /* ! BZ_CONFIG_ERROR */ | 
|  | 61 |  | 
|  | 62 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 63 | #ifdef WITH_THREAD | 
| Antoine Pitrou | dd62966 | 2009-10-27 17:41:58 +0000 | [diff] [blame] | 64 | #define ACQUIRE_LOCK(obj) do { \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 65 | if (!PyThread_acquire_lock(obj->lock, 0)) { \ | 
|  | 66 | Py_BEGIN_ALLOW_THREADS \ | 
|  | 67 | PyThread_acquire_lock(obj->lock, 1); \ | 
|  | 68 | Py_END_ALLOW_THREADS \ | 
|  | 69 | } } while(0) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 70 | #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock) | 
|  | 71 | #else | 
|  | 72 | #define ACQUIRE_LOCK(obj) | 
|  | 73 | #define RELEASE_LOCK(obj) | 
|  | 74 | #endif | 
|  | 75 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 76 | #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) | 
|  | 77 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 78 | /* Bits in f_newlinetypes */ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 79 | #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */ | 
|  | 80 | #define NEWLINE_CR 1            /* \r newline seen */ | 
|  | 81 | #define NEWLINE_LF 2            /* \n newline seen */ | 
|  | 82 | #define NEWLINE_CRLF 4          /* \r\n newline seen */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 83 |  | 
|  | 84 | /* ===================================================================== */ | 
|  | 85 | /* Structure definitions. */ | 
|  | 86 |  | 
|  | 87 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 88 | PyObject_HEAD | 
|  | 89 | PyObject *file; | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 90 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 91 | char* f_buf;                /* Allocated readahead buffer */ | 
|  | 92 | char* f_bufend;             /* Points after last occupied position */ | 
|  | 93 | char* f_bufptr;             /* Current buffer position */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 94 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 95 | int f_softspace;            /* Flag used by 'print' command */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 96 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 97 | int f_univ_newline;         /* Handle any newline convention */ | 
|  | 98 | int f_newlinetypes;         /* Types of newlines seen */ | 
|  | 99 | int f_skipnextlf;           /* Skip next \n */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 100 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 101 | BZFILE *fp; | 
|  | 102 | int mode; | 
|  | 103 | Py_off_t pos; | 
|  | 104 | Py_off_t size; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 105 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 106 | PyThread_type_lock lock; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 107 | #endif | 
|  | 108 | } BZ2FileObject; | 
|  | 109 |  | 
|  | 110 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 111 | PyObject_HEAD | 
|  | 112 | bz_stream bzs; | 
|  | 113 | int running; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 114 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 115 | PyThread_type_lock lock; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 116 | #endif | 
|  | 117 | } BZ2CompObject; | 
|  | 118 |  | 
|  | 119 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 120 | PyObject_HEAD | 
|  | 121 | bz_stream bzs; | 
|  | 122 | int running; | 
|  | 123 | PyObject *unused_data; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 124 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 125 | PyThread_type_lock lock; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 126 | #endif | 
|  | 127 | } BZ2DecompObject; | 
|  | 128 |  | 
|  | 129 | /* ===================================================================== */ | 
|  | 130 | /* Utility functions. */ | 
|  | 131 |  | 
| Antoine Pitrou | 3970301 | 2010-08-01 20:13:11 +0000 | [diff] [blame] | 132 | /* Refuse regular I/O if there's data in the iteration-buffer. | 
|  | 133 | * Mixing them would cause data to arrive out of order, as the read* | 
|  | 134 | * methods don't use the iteration buffer. */ | 
|  | 135 | static int | 
|  | 136 | check_iterbuffered(BZ2FileObject *f) | 
|  | 137 | { | 
|  | 138 | if (f->f_buf != NULL && | 
|  | 139 | (f->f_bufend - f->f_bufptr) > 0 && | 
|  | 140 | f->f_buf[0] != '\0') { | 
|  | 141 | PyErr_SetString(PyExc_ValueError, | 
|  | 142 | "Mixing iteration and read methods would lose data"); | 
|  | 143 | return -1; | 
|  | 144 | } | 
|  | 145 | return 0; | 
|  | 146 | } | 
|  | 147 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 148 | static int | 
|  | 149 | Util_CatchBZ2Error(int bzerror) | 
|  | 150 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 151 | int ret = 0; | 
|  | 152 | switch(bzerror) { | 
|  | 153 | case BZ_OK: | 
|  | 154 | case BZ_STREAM_END: | 
|  | 155 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 156 |  | 
| Gustavo Niemeyer | 7628f1f | 2003-04-27 06:25:24 +0000 | [diff] [blame] | 157 | #ifdef BZ_CONFIG_ERROR | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 158 | case BZ_CONFIG_ERROR: | 
|  | 159 | PyErr_SetString(PyExc_SystemError, | 
|  | 160 | "the bz2 library was not compiled " | 
|  | 161 | "correctly"); | 
|  | 162 | ret = 1; | 
|  | 163 | break; | 
| Gustavo Niemeyer | 7628f1f | 2003-04-27 06:25:24 +0000 | [diff] [blame] | 164 | #endif | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 165 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 166 | case BZ_PARAM_ERROR: | 
|  | 167 | PyErr_SetString(PyExc_ValueError, | 
|  | 168 | "the bz2 library has received wrong " | 
|  | 169 | "parameters"); | 
|  | 170 | ret = 1; | 
|  | 171 | break; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 172 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 173 | case BZ_MEM_ERROR: | 
|  | 174 | PyErr_NoMemory(); | 
|  | 175 | ret = 1; | 
|  | 176 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 177 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 178 | case BZ_DATA_ERROR: | 
|  | 179 | case BZ_DATA_ERROR_MAGIC: | 
|  | 180 | PyErr_SetString(PyExc_IOError, "invalid data stream"); | 
|  | 181 | ret = 1; | 
|  | 182 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 183 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 184 | case BZ_IO_ERROR: | 
|  | 185 | PyErr_SetString(PyExc_IOError, "unknown IO error"); | 
|  | 186 | ret = 1; | 
|  | 187 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 188 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 189 | case BZ_UNEXPECTED_EOF: | 
|  | 190 | PyErr_SetString(PyExc_EOFError, | 
|  | 191 | "compressed file ended before the " | 
|  | 192 | "logical end-of-stream was detected"); | 
|  | 193 | ret = 1; | 
|  | 194 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 195 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 196 | case BZ_SEQUENCE_ERROR: | 
|  | 197 | PyErr_SetString(PyExc_RuntimeError, | 
|  | 198 | "wrong sequence of bz2 library " | 
|  | 199 | "commands used"); | 
|  | 200 | ret = 1; | 
|  | 201 | break; | 
|  | 202 | } | 
|  | 203 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 204 | } | 
|  | 205 |  | 
|  | 206 | #if BUFSIZ < 8192 | 
|  | 207 | #define SMALLCHUNK 8192 | 
|  | 208 | #else | 
|  | 209 | #define SMALLCHUNK BUFSIZ | 
|  | 210 | #endif | 
|  | 211 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 212 | /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ | 
|  | 213 | static size_t | 
|  | 214 | Util_NewBufferSize(size_t currentsize) | 
|  | 215 | { | 
| Nadeem Vawda | 3624815 | 2011-10-13 13:52:46 +0200 | [diff] [blame] | 216 | /* Expand the buffer by an amount proportional to the current size, | 
|  | 217 | giving us amortized linear-time behavior. Use a less-than-double | 
|  | 218 | growth factor to avoid excessive allocation. */ | 
|  | 219 | return currentsize + (currentsize >> 3) + 6; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 220 | } | 
|  | 221 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 222 | static int | 
|  | 223 | Util_GrowBuffer(PyObject **buf) | 
|  | 224 | { | 
|  | 225 | size_t size = PyString_GET_SIZE(*buf); | 
|  | 226 | size_t new_size = Util_NewBufferSize(size); | 
|  | 227 | if (new_size > size) { | 
|  | 228 | return _PyString_Resize(buf, new_size); | 
|  | 229 | } else {  /* overflow */ | 
|  | 230 | PyErr_SetString(PyExc_OverflowError, | 
|  | 231 | "Unable to allocate buffer - output too large"); | 
|  | 232 | return -1; | 
|  | 233 | } | 
|  | 234 | } | 
|  | 235 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 236 | /* This is a hacked version of Python's fileobject.c:get_line(). */ | 
|  | 237 | static PyObject * | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 238 | Util_GetLine(BZ2FileObject *f, int n) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 239 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 240 | char c; | 
|  | 241 | char *buf, *end; | 
|  | 242 | size_t total_v_size;        /* total # of slots in buffer */ | 
|  | 243 | size_t used_v_size;         /* # used slots in buffer */ | 
|  | 244 | size_t increment;       /* amount to increment the buffer */ | 
|  | 245 | PyObject *v; | 
|  | 246 | int bzerror; | 
|  | 247 | int bytes_read; | 
|  | 248 | int newlinetypes = f->f_newlinetypes; | 
|  | 249 | int skipnextlf = f->f_skipnextlf; | 
|  | 250 | int univ_newline = f->f_univ_newline; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 251 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 252 | total_v_size = n > 0 ? n : 100; | 
|  | 253 | v = PyString_FromStringAndSize((char *)NULL, total_v_size); | 
|  | 254 | if (v == NULL) | 
|  | 255 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 256 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 257 | buf = BUF(v); | 
|  | 258 | end = buf + total_v_size; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 259 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 260 | for (;;) { | 
|  | 261 | Py_BEGIN_ALLOW_THREADS | 
|  | 262 | while (buf != end) { | 
|  | 263 | bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); | 
|  | 264 | f->pos++; | 
|  | 265 | if (bytes_read == 0) break; | 
|  | 266 | if (univ_newline) { | 
|  | 267 | if (skipnextlf) { | 
|  | 268 | skipnextlf = 0; | 
|  | 269 | if (c == '\n') { | 
|  | 270 | /* Seeing a \n here with skipnextlf true means we | 
|  | 271 | * saw a \r before. | 
|  | 272 | */ | 
|  | 273 | newlinetypes |= NEWLINE_CRLF; | 
|  | 274 | if (bzerror != BZ_OK) break; | 
|  | 275 | bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); | 
|  | 276 | f->pos++; | 
|  | 277 | if (bytes_read == 0) break; | 
|  | 278 | } else { | 
|  | 279 | newlinetypes |= NEWLINE_CR; | 
|  | 280 | } | 
|  | 281 | } | 
|  | 282 | if (c == '\r') { | 
|  | 283 | skipnextlf = 1; | 
|  | 284 | c = '\n'; | 
|  | 285 | } else if (c == '\n') | 
|  | 286 | newlinetypes |= NEWLINE_LF; | 
|  | 287 | } | 
|  | 288 | *buf++ = c; | 
|  | 289 | if (bzerror != BZ_OK || c == '\n') break; | 
|  | 290 | } | 
|  | 291 | if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf) | 
|  | 292 | newlinetypes |= NEWLINE_CR; | 
|  | 293 | Py_END_ALLOW_THREADS | 
|  | 294 | f->f_newlinetypes = newlinetypes; | 
|  | 295 | f->f_skipnextlf = skipnextlf; | 
|  | 296 | if (bzerror == BZ_STREAM_END) { | 
|  | 297 | f->size = f->pos; | 
|  | 298 | f->mode = MODE_READ_EOF; | 
|  | 299 | break; | 
|  | 300 | } else if (bzerror != BZ_OK) { | 
|  | 301 | Util_CatchBZ2Error(bzerror); | 
|  | 302 | Py_DECREF(v); | 
|  | 303 | return NULL; | 
|  | 304 | } | 
|  | 305 | if (c == '\n') | 
|  | 306 | break; | 
|  | 307 | /* Must be because buf == end */ | 
|  | 308 | if (n > 0) | 
|  | 309 | break; | 
|  | 310 | used_v_size = total_v_size; | 
|  | 311 | increment = total_v_size >> 2; /* mild exponential growth */ | 
|  | 312 | total_v_size += increment; | 
|  | 313 | if (total_v_size > INT_MAX) { | 
|  | 314 | PyErr_SetString(PyExc_OverflowError, | 
|  | 315 | "line is longer than a Python string can hold"); | 
|  | 316 | Py_DECREF(v); | 
|  | 317 | return NULL; | 
|  | 318 | } | 
|  | 319 | if (_PyString_Resize(&v, total_v_size) < 0) | 
|  | 320 | return NULL; | 
|  | 321 | buf = BUF(v) + used_v_size; | 
|  | 322 | end = BUF(v) + total_v_size; | 
|  | 323 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 324 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 325 | used_v_size = buf - BUF(v); | 
|  | 326 | if (used_v_size != total_v_size) | 
|  | 327 | _PyString_Resize(&v, used_v_size); | 
|  | 328 | return v; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 329 | } | 
|  | 330 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 331 | /* This is a hacked version of Python's | 
|  | 332 | * fileobject.c:Py_UniversalNewlineFread(). */ | 
|  | 333 | size_t | 
|  | 334 | Util_UnivNewlineRead(int *bzerror, BZFILE *stream, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 335 | char* buf, size_t n, BZ2FileObject *f) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 336 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 337 | char *dst = buf; | 
|  | 338 | int newlinetypes, skipnextlf; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 339 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 340 | assert(buf != NULL); | 
|  | 341 | assert(stream != NULL); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 342 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 343 | if (!f->f_univ_newline) | 
|  | 344 | return BZ2_bzRead(bzerror, stream, buf, n); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 345 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 346 | newlinetypes = f->f_newlinetypes; | 
|  | 347 | skipnextlf = f->f_skipnextlf; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 348 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 349 | /* Invariant:  n is the number of bytes remaining to be filled | 
|  | 350 | * in the buffer. | 
|  | 351 | */ | 
|  | 352 | while (n) { | 
|  | 353 | size_t nread; | 
|  | 354 | int shortread; | 
|  | 355 | char *src = dst; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 356 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 357 | nread = BZ2_bzRead(bzerror, stream, dst, n); | 
|  | 358 | assert(nread <= n); | 
|  | 359 | n -= nread; /* assuming 1 byte out for each in; will adjust */ | 
|  | 360 | shortread = n != 0;             /* true iff EOF or error */ | 
|  | 361 | while (nread--) { | 
|  | 362 | char c = *src++; | 
|  | 363 | if (c == '\r') { | 
|  | 364 | /* Save as LF and set flag to skip next LF. */ | 
|  | 365 | *dst++ = '\n'; | 
|  | 366 | skipnextlf = 1; | 
|  | 367 | } | 
|  | 368 | else if (skipnextlf && c == '\n') { | 
|  | 369 | /* Skip LF, and remember we saw CR LF. */ | 
|  | 370 | skipnextlf = 0; | 
|  | 371 | newlinetypes |= NEWLINE_CRLF; | 
|  | 372 | ++n; | 
|  | 373 | } | 
|  | 374 | else { | 
|  | 375 | /* Normal char to be stored in buffer.  Also | 
|  | 376 | * update the newlinetypes flag if either this | 
|  | 377 | * is an LF or the previous char was a CR. | 
|  | 378 | */ | 
|  | 379 | if (c == '\n') | 
|  | 380 | newlinetypes |= NEWLINE_LF; | 
|  | 381 | else if (skipnextlf) | 
|  | 382 | newlinetypes |= NEWLINE_CR; | 
|  | 383 | *dst++ = c; | 
|  | 384 | skipnextlf = 0; | 
|  | 385 | } | 
|  | 386 | } | 
|  | 387 | if (shortread) { | 
|  | 388 | /* If this is EOF, update type flags. */ | 
|  | 389 | if (skipnextlf && *bzerror == BZ_STREAM_END) | 
|  | 390 | newlinetypes |= NEWLINE_CR; | 
|  | 391 | break; | 
|  | 392 | } | 
|  | 393 | } | 
|  | 394 | f->f_newlinetypes = newlinetypes; | 
|  | 395 | f->f_skipnextlf = skipnextlf; | 
|  | 396 | return dst - buf; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 397 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 398 |  | 
|  | 399 | /* This is a hacked version of Python's fileobject.c:drop_readahead(). */ | 
|  | 400 | static void | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 401 | Util_DropReadAhead(BZ2FileObject *f) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 402 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 403 | if (f->f_buf != NULL) { | 
|  | 404 | PyMem_Free(f->f_buf); | 
|  | 405 | f->f_buf = NULL; | 
|  | 406 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 407 | } | 
|  | 408 |  | 
|  | 409 | /* This is a hacked version of Python's fileobject.c:readahead(). */ | 
|  | 410 | static int | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 411 | Util_ReadAhead(BZ2FileObject *f, int bufsize) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 412 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 413 | int chunksize; | 
|  | 414 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 415 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 416 | if (f->f_buf != NULL) { | 
|  | 417 | if((f->f_bufend - f->f_bufptr) >= 1) | 
|  | 418 | return 0; | 
|  | 419 | else | 
|  | 420 | Util_DropReadAhead(f); | 
|  | 421 | } | 
|  | 422 | if (f->mode == MODE_READ_EOF) { | 
|  | 423 | f->f_bufptr = f->f_buf; | 
|  | 424 | f->f_bufend = f->f_buf; | 
|  | 425 | return 0; | 
|  | 426 | } | 
|  | 427 | if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { | 
|  | 428 | PyErr_NoMemory(); | 
|  | 429 | return -1; | 
|  | 430 | } | 
|  | 431 | Py_BEGIN_ALLOW_THREADS | 
|  | 432 | chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, | 
|  | 433 | bufsize, f); | 
|  | 434 | Py_END_ALLOW_THREADS | 
|  | 435 | f->pos += chunksize; | 
|  | 436 | if (bzerror == BZ_STREAM_END) { | 
|  | 437 | f->size = f->pos; | 
|  | 438 | f->mode = MODE_READ_EOF; | 
|  | 439 | } else if (bzerror != BZ_OK) { | 
|  | 440 | Util_CatchBZ2Error(bzerror); | 
|  | 441 | Util_DropReadAhead(f); | 
|  | 442 | return -1; | 
|  | 443 | } | 
|  | 444 | f->f_bufptr = f->f_buf; | 
|  | 445 | f->f_bufend = f->f_buf + chunksize; | 
|  | 446 | return 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 447 | } | 
|  | 448 |  | 
|  | 449 | /* This is a hacked version of Python's | 
|  | 450 | * fileobject.c:readahead_get_line_skip(). */ | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 451 | static PyStringObject * | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 452 | Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize) | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 453 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 454 | PyStringObject* s; | 
|  | 455 | char *bufptr; | 
|  | 456 | char *buf; | 
|  | 457 | int len; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 458 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 459 | if (f->f_buf == NULL) | 
|  | 460 | if (Util_ReadAhead(f, bufsize) < 0) | 
|  | 461 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 462 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 463 | len = f->f_bufend - f->f_bufptr; | 
|  | 464 | if (len == 0) | 
|  | 465 | return (PyStringObject *) | 
|  | 466 | PyString_FromStringAndSize(NULL, skip); | 
|  | 467 | bufptr = memchr(f->f_bufptr, '\n', len); | 
|  | 468 | if (bufptr != NULL) { | 
|  | 469 | bufptr++;                               /* Count the '\n' */ | 
|  | 470 | len = bufptr - f->f_bufptr; | 
|  | 471 | s = (PyStringObject *) | 
|  | 472 | PyString_FromStringAndSize(NULL, skip+len); | 
|  | 473 | if (s == NULL) | 
|  | 474 | return NULL; | 
|  | 475 | memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); | 
|  | 476 | f->f_bufptr = bufptr; | 
|  | 477 | if (bufptr == f->f_bufend) | 
|  | 478 | Util_DropReadAhead(f); | 
|  | 479 | } else { | 
|  | 480 | bufptr = f->f_bufptr; | 
|  | 481 | buf = f->f_buf; | 
|  | 482 | f->f_buf = NULL;                /* Force new readahead buffer */ | 
|  | 483 | s = Util_ReadAheadGetLineSkip(f, skip+len, | 
|  | 484 | bufsize + (bufsize>>2)); | 
|  | 485 | if (s == NULL) { | 
|  | 486 | PyMem_Free(buf); | 
|  | 487 | return NULL; | 
|  | 488 | } | 
|  | 489 | memcpy(PyString_AS_STRING(s)+skip, bufptr, len); | 
|  | 490 | PyMem_Free(buf); | 
|  | 491 | } | 
|  | 492 | return s; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 493 | } | 
|  | 494 |  | 
|  | 495 | /* ===================================================================== */ | 
|  | 496 | /* Methods of BZ2File. */ | 
|  | 497 |  | 
|  | 498 | PyDoc_STRVAR(BZ2File_read__doc__, | 
|  | 499 | "read([size]) -> string\n\ | 
|  | 500 | \n\ | 
|  | 501 | Read at most size uncompressed bytes, returned as a string. If the size\n\ | 
|  | 502 | argument is negative or omitted, read until EOF is reached.\n\ | 
|  | 503 | "); | 
|  | 504 |  | 
|  | 505 | /* This is a hacked version of Python's fileobject.c:file_read(). */ | 
|  | 506 | static PyObject * | 
|  | 507 | BZ2File_read(BZ2FileObject *self, PyObject *args) | 
|  | 508 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 509 | long bytesrequested = -1; | 
|  | 510 | size_t bytesread, buffersize, chunksize; | 
|  | 511 | int bzerror; | 
|  | 512 | PyObject *ret = NULL; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 513 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 514 | if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) | 
|  | 515 | return NULL; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 516 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 517 | ACQUIRE_LOCK(self); | 
|  | 518 | switch (self->mode) { | 
|  | 519 | case MODE_READ: | 
|  | 520 | break; | 
|  | 521 | case MODE_READ_EOF: | 
|  | 522 | ret = PyString_FromString(""); | 
|  | 523 | goto cleanup; | 
|  | 524 | case MODE_CLOSED: | 
|  | 525 | PyErr_SetString(PyExc_ValueError, | 
|  | 526 | "I/O operation on closed file"); | 
|  | 527 | goto cleanup; | 
|  | 528 | default: | 
|  | 529 | PyErr_SetString(PyExc_IOError, | 
|  | 530 | "file is not ready for reading"); | 
|  | 531 | goto cleanup; | 
|  | 532 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 533 |  | 
| Antoine Pitrou | 3970301 | 2010-08-01 20:13:11 +0000 | [diff] [blame] | 534 | /* refuse to mix with f.next() */ | 
|  | 535 | if (check_iterbuffered(self)) | 
|  | 536 | goto cleanup; | 
|  | 537 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 538 | if (bytesrequested < 0) | 
|  | 539 | buffersize = Util_NewBufferSize((size_t)0); | 
|  | 540 | else | 
|  | 541 | buffersize = bytesrequested; | 
|  | 542 | if (buffersize > INT_MAX) { | 
|  | 543 | PyErr_SetString(PyExc_OverflowError, | 
|  | 544 | "requested number of bytes is " | 
|  | 545 | "more than a Python string can hold"); | 
|  | 546 | goto cleanup; | 
|  | 547 | } | 
|  | 548 | ret = PyString_FromStringAndSize((char *)NULL, buffersize); | 
|  | 549 | if (ret == NULL) | 
|  | 550 | goto cleanup; | 
|  | 551 | bytesread = 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 552 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 553 | for (;;) { | 
|  | 554 | Py_BEGIN_ALLOW_THREADS | 
|  | 555 | chunksize = Util_UnivNewlineRead(&bzerror, self->fp, | 
|  | 556 | BUF(ret)+bytesread, | 
|  | 557 | buffersize-bytesread, | 
|  | 558 | self); | 
|  | 559 | self->pos += chunksize; | 
|  | 560 | Py_END_ALLOW_THREADS | 
|  | 561 | bytesread += chunksize; | 
|  | 562 | if (bzerror == BZ_STREAM_END) { | 
|  | 563 | self->size = self->pos; | 
|  | 564 | self->mode = MODE_READ_EOF; | 
|  | 565 | break; | 
|  | 566 | } else if (bzerror != BZ_OK) { | 
|  | 567 | Util_CatchBZ2Error(bzerror); | 
|  | 568 | Py_DECREF(ret); | 
|  | 569 | ret = NULL; | 
|  | 570 | goto cleanup; | 
|  | 571 | } | 
|  | 572 | if (bytesrequested < 0) { | 
|  | 573 | buffersize = Util_NewBufferSize(buffersize); | 
|  | 574 | if (_PyString_Resize(&ret, buffersize) < 0) | 
|  | 575 | goto cleanup; | 
|  | 576 | } else { | 
|  | 577 | break; | 
|  | 578 | } | 
|  | 579 | } | 
|  | 580 | if (bytesread != buffersize) | 
|  | 581 | _PyString_Resize(&ret, bytesread); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 582 |  | 
|  | 583 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 584 | RELEASE_LOCK(self); | 
|  | 585 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 586 | } | 
|  | 587 |  | 
|  | 588 | PyDoc_STRVAR(BZ2File_readline__doc__, | 
|  | 589 | "readline([size]) -> string\n\ | 
|  | 590 | \n\ | 
|  | 591 | Return the next line from the file, as a string, retaining newline.\n\ | 
|  | 592 | A non-negative size argument will limit the maximum number of bytes to\n\ | 
|  | 593 | return (an incomplete line may be returned then). Return an empty\n\ | 
|  | 594 | string at EOF.\n\ | 
|  | 595 | "); | 
|  | 596 |  | 
|  | 597 | static PyObject * | 
|  | 598 | BZ2File_readline(BZ2FileObject *self, PyObject *args) | 
|  | 599 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 600 | PyObject *ret = NULL; | 
|  | 601 | int sizehint = -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 602 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 603 | if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) | 
|  | 604 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 605 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 606 | ACQUIRE_LOCK(self); | 
|  | 607 | switch (self->mode) { | 
|  | 608 | case MODE_READ: | 
|  | 609 | break; | 
|  | 610 | case MODE_READ_EOF: | 
|  | 611 | ret = PyString_FromString(""); | 
|  | 612 | goto cleanup; | 
|  | 613 | case MODE_CLOSED: | 
|  | 614 | PyErr_SetString(PyExc_ValueError, | 
|  | 615 | "I/O operation on closed file"); | 
|  | 616 | goto cleanup; | 
|  | 617 | default: | 
|  | 618 | PyErr_SetString(PyExc_IOError, | 
|  | 619 | "file is not ready for reading"); | 
|  | 620 | goto cleanup; | 
|  | 621 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 622 |  | 
| Antoine Pitrou | 3970301 | 2010-08-01 20:13:11 +0000 | [diff] [blame] | 623 | /* refuse to mix with f.next() */ | 
|  | 624 | if (check_iterbuffered(self)) | 
|  | 625 | goto cleanup; | 
|  | 626 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 627 | if (sizehint == 0) | 
|  | 628 | ret = PyString_FromString(""); | 
|  | 629 | else | 
|  | 630 | ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 631 |  | 
|  | 632 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 633 | RELEASE_LOCK(self); | 
|  | 634 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 635 | } | 
|  | 636 |  | 
|  | 637 | PyDoc_STRVAR(BZ2File_readlines__doc__, | 
|  | 638 | "readlines([size]) -> list\n\ | 
|  | 639 | \n\ | 
|  | 640 | Call readline() repeatedly and return a list of lines read.\n\ | 
|  | 641 | The optional size argument, if given, is an approximate bound on the\n\ | 
|  | 642 | total number of bytes in the lines returned.\n\ | 
|  | 643 | "); | 
|  | 644 |  | 
|  | 645 | /* This is a hacked version of Python's fileobject.c:file_readlines(). */ | 
|  | 646 | static PyObject * | 
|  | 647 | BZ2File_readlines(BZ2FileObject *self, PyObject *args) | 
|  | 648 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 649 | long sizehint = 0; | 
|  | 650 | PyObject *list = NULL; | 
|  | 651 | PyObject *line; | 
|  | 652 | char small_buffer[SMALLCHUNK]; | 
|  | 653 | char *buffer = small_buffer; | 
|  | 654 | size_t buffersize = SMALLCHUNK; | 
|  | 655 | PyObject *big_buffer = NULL; | 
|  | 656 | size_t nfilled = 0; | 
|  | 657 | size_t nread; | 
|  | 658 | size_t totalread = 0; | 
|  | 659 | char *p, *q, *end; | 
|  | 660 | int err; | 
|  | 661 | int shortread = 0; | 
|  | 662 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 663 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 664 | if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) | 
|  | 665 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 666 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 667 | ACQUIRE_LOCK(self); | 
|  | 668 | switch (self->mode) { | 
|  | 669 | case MODE_READ: | 
|  | 670 | break; | 
|  | 671 | case MODE_READ_EOF: | 
|  | 672 | list = PyList_New(0); | 
|  | 673 | goto cleanup; | 
|  | 674 | case MODE_CLOSED: | 
|  | 675 | PyErr_SetString(PyExc_ValueError, | 
|  | 676 | "I/O operation on closed file"); | 
|  | 677 | goto cleanup; | 
|  | 678 | default: | 
|  | 679 | PyErr_SetString(PyExc_IOError, | 
|  | 680 | "file is not ready for reading"); | 
|  | 681 | goto cleanup; | 
|  | 682 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 683 |  | 
| Antoine Pitrou | 3970301 | 2010-08-01 20:13:11 +0000 | [diff] [blame] | 684 | /* refuse to mix with f.next() */ | 
|  | 685 | if (check_iterbuffered(self)) | 
|  | 686 | goto cleanup; | 
|  | 687 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 688 | if ((list = PyList_New(0)) == NULL) | 
|  | 689 | goto cleanup; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 690 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 691 | for (;;) { | 
|  | 692 | Py_BEGIN_ALLOW_THREADS | 
|  | 693 | nread = Util_UnivNewlineRead(&bzerror, self->fp, | 
|  | 694 | buffer+nfilled, | 
|  | 695 | buffersize-nfilled, self); | 
|  | 696 | self->pos += nread; | 
|  | 697 | Py_END_ALLOW_THREADS | 
|  | 698 | if (bzerror == BZ_STREAM_END) { | 
|  | 699 | self->size = self->pos; | 
|  | 700 | self->mode = MODE_READ_EOF; | 
|  | 701 | if (nread == 0) { | 
|  | 702 | sizehint = 0; | 
|  | 703 | break; | 
|  | 704 | } | 
|  | 705 | shortread = 1; | 
|  | 706 | } else if (bzerror != BZ_OK) { | 
|  | 707 | Util_CatchBZ2Error(bzerror); | 
|  | 708 | error: | 
|  | 709 | Py_DECREF(list); | 
|  | 710 | list = NULL; | 
|  | 711 | goto cleanup; | 
|  | 712 | } | 
|  | 713 | totalread += nread; | 
|  | 714 | p = memchr(buffer+nfilled, '\n', nread); | 
|  | 715 | if (!shortread && p == NULL) { | 
|  | 716 | /* Need a larger buffer to fit this line */ | 
|  | 717 | nfilled += nread; | 
|  | 718 | buffersize *= 2; | 
|  | 719 | if (buffersize > INT_MAX) { | 
|  | 720 | PyErr_SetString(PyExc_OverflowError, | 
|  | 721 | "line is longer than a Python string can hold"); | 
|  | 722 | goto error; | 
|  | 723 | } | 
|  | 724 | if (big_buffer == NULL) { | 
|  | 725 | /* Create the big buffer */ | 
|  | 726 | big_buffer = PyString_FromStringAndSize( | 
|  | 727 | NULL, buffersize); | 
|  | 728 | if (big_buffer == NULL) | 
|  | 729 | goto error; | 
|  | 730 | buffer = PyString_AS_STRING(big_buffer); | 
|  | 731 | memcpy(buffer, small_buffer, nfilled); | 
|  | 732 | } | 
|  | 733 | else { | 
|  | 734 | /* Grow the big buffer */ | 
| Kristján Valur Jónsson | be580f2 | 2014-04-25 09:51:21 +0000 | [diff] [blame] | 735 | if (_PyString_Resize(&big_buffer, buffersize)) | 
|  | 736 | goto error; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 737 | buffer = PyString_AS_STRING(big_buffer); | 
|  | 738 | } | 
|  | 739 | continue; | 
|  | 740 | } | 
|  | 741 | end = buffer+nfilled+nread; | 
|  | 742 | q = buffer; | 
|  | 743 | while (p != NULL) { | 
|  | 744 | /* Process complete lines */ | 
|  | 745 | p++; | 
|  | 746 | line = PyString_FromStringAndSize(q, p-q); | 
|  | 747 | if (line == NULL) | 
|  | 748 | goto error; | 
|  | 749 | err = PyList_Append(list, line); | 
|  | 750 | Py_DECREF(line); | 
|  | 751 | if (err != 0) | 
|  | 752 | goto error; | 
|  | 753 | q = p; | 
|  | 754 | p = memchr(q, '\n', end-q); | 
|  | 755 | } | 
|  | 756 | /* Move the remaining incomplete line to the start */ | 
|  | 757 | nfilled = end-q; | 
|  | 758 | memmove(buffer, q, nfilled); | 
|  | 759 | if (sizehint > 0) | 
|  | 760 | if (totalread >= (size_t)sizehint) | 
|  | 761 | break; | 
|  | 762 | if (shortread) { | 
|  | 763 | sizehint = 0; | 
|  | 764 | break; | 
|  | 765 | } | 
|  | 766 | } | 
|  | 767 | if (nfilled != 0) { | 
|  | 768 | /* Partial last line */ | 
|  | 769 | line = PyString_FromStringAndSize(buffer, nfilled); | 
|  | 770 | if (line == NULL) | 
|  | 771 | goto error; | 
|  | 772 | if (sizehint > 0) { | 
|  | 773 | /* Need to complete the last line */ | 
|  | 774 | PyObject *rest = Util_GetLine(self, 0); | 
|  | 775 | if (rest == NULL) { | 
|  | 776 | Py_DECREF(line); | 
|  | 777 | goto error; | 
|  | 778 | } | 
|  | 779 | PyString_Concat(&line, rest); | 
|  | 780 | Py_DECREF(rest); | 
|  | 781 | if (line == NULL) | 
|  | 782 | goto error; | 
|  | 783 | } | 
|  | 784 | err = PyList_Append(list, line); | 
|  | 785 | Py_DECREF(line); | 
|  | 786 | if (err != 0) | 
|  | 787 | goto error; | 
|  | 788 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 789 |  | 
|  | 790 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 791 | RELEASE_LOCK(self); | 
|  | 792 | if (big_buffer) { | 
|  | 793 | Py_DECREF(big_buffer); | 
|  | 794 | } | 
|  | 795 | return list; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 796 | } | 
|  | 797 |  | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 798 | PyDoc_STRVAR(BZ2File_xreadlines__doc__, | 
|  | 799 | "xreadlines() -> self\n\ | 
|  | 800 | \n\ | 
|  | 801 | For backward compatibility. BZ2File objects now include the performance\n\ | 
|  | 802 | optimizations previously implemented in the xreadlines module.\n\ | 
|  | 803 | "); | 
|  | 804 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 805 | PyDoc_STRVAR(BZ2File_write__doc__, | 
|  | 806 | "write(data) -> None\n\ | 
|  | 807 | \n\ | 
|  | 808 | Write the 'data' string to file. Note that due to buffering, close() may\n\ | 
|  | 809 | be needed before the file on disk reflects the data written.\n\ | 
|  | 810 | "); | 
|  | 811 |  | 
|  | 812 | /* This is a hacked version of Python's fileobject.c:file_write(). */ | 
|  | 813 | static PyObject * | 
|  | 814 | BZ2File_write(BZ2FileObject *self, PyObject *args) | 
|  | 815 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 816 | PyObject *ret = NULL; | 
|  | 817 | Py_buffer pbuf; | 
|  | 818 | char *buf; | 
|  | 819 | int len; | 
|  | 820 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 821 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 822 | if (!PyArg_ParseTuple(args, "s*:write", &pbuf)) | 
|  | 823 | return NULL; | 
|  | 824 | buf = pbuf.buf; | 
|  | 825 | len = pbuf.len; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 826 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 827 | ACQUIRE_LOCK(self); | 
|  | 828 | switch (self->mode) { | 
|  | 829 | case MODE_WRITE: | 
|  | 830 | break; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 831 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 832 | case MODE_CLOSED: | 
|  | 833 | PyErr_SetString(PyExc_ValueError, | 
|  | 834 | "I/O operation on closed file"); | 
|  | 835 | goto cleanup; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 836 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 837 | default: | 
|  | 838 | PyErr_SetString(PyExc_IOError, | 
|  | 839 | "file is not ready for writing"); | 
|  | 840 | goto cleanup; | 
|  | 841 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 842 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 843 | self->f_softspace = 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 844 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 845 | Py_BEGIN_ALLOW_THREADS | 
|  | 846 | BZ2_bzWrite (&bzerror, self->fp, buf, len); | 
|  | 847 | self->pos += len; | 
|  | 848 | Py_END_ALLOW_THREADS | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 849 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 850 | if (bzerror != BZ_OK) { | 
|  | 851 | Util_CatchBZ2Error(bzerror); | 
|  | 852 | goto cleanup; | 
|  | 853 | } | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 854 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 855 | Py_INCREF(Py_None); | 
|  | 856 | ret = Py_None; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 857 |  | 
|  | 858 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 859 | PyBuffer_Release(&pbuf); | 
|  | 860 | RELEASE_LOCK(self); | 
|  | 861 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 862 | } | 
|  | 863 |  | 
|  | 864 | PyDoc_STRVAR(BZ2File_writelines__doc__, | 
|  | 865 | "writelines(sequence_of_strings) -> None\n\ | 
|  | 866 | \n\ | 
|  | 867 | Write the sequence of strings to the file. Note that newlines are not\n\ | 
|  | 868 | added. The sequence can be any iterable object producing strings. This is\n\ | 
|  | 869 | equivalent to calling write() for each string.\n\ | 
|  | 870 | "); | 
|  | 871 |  | 
|  | 872 | /* This is a hacked version of Python's fileobject.c:file_writelines(). */ | 
|  | 873 | static PyObject * | 
|  | 874 | BZ2File_writelines(BZ2FileObject *self, PyObject *seq) | 
|  | 875 | { | 
|  | 876 | #define CHUNKSIZE 1000 | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 877 | PyObject *list = NULL; | 
|  | 878 | PyObject *iter = NULL; | 
|  | 879 | PyObject *ret = NULL; | 
|  | 880 | PyObject *line; | 
|  | 881 | int i, j, index, len, islist; | 
|  | 882 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 883 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 884 | ACQUIRE_LOCK(self); | 
|  | 885 | switch (self->mode) { | 
|  | 886 | case MODE_WRITE: | 
|  | 887 | break; | 
| Georg Brandl | 3335a7a | 2006-08-14 21:42:55 +0000 | [diff] [blame] | 888 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 889 | case MODE_CLOSED: | 
|  | 890 | PyErr_SetString(PyExc_ValueError, | 
|  | 891 | "I/O operation on closed file"); | 
|  | 892 | goto error; | 
| Georg Brandl | 3335a7a | 2006-08-14 21:42:55 +0000 | [diff] [blame] | 893 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 894 | default: | 
|  | 895 | PyErr_SetString(PyExc_IOError, | 
|  | 896 | "file is not ready for writing"); | 
|  | 897 | goto error; | 
|  | 898 | } | 
| Georg Brandl | 3335a7a | 2006-08-14 21:42:55 +0000 | [diff] [blame] | 899 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 900 | islist = PyList_Check(seq); | 
|  | 901 | if  (!islist) { | 
|  | 902 | iter = PyObject_GetIter(seq); | 
|  | 903 | if (iter == NULL) { | 
|  | 904 | PyErr_SetString(PyExc_TypeError, | 
|  | 905 | "writelines() requires an iterable argument"); | 
|  | 906 | goto error; | 
|  | 907 | } | 
|  | 908 | list = PyList_New(CHUNKSIZE); | 
|  | 909 | if (list == NULL) | 
|  | 910 | goto error; | 
|  | 911 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 912 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 913 | /* Strategy: slurp CHUNKSIZE lines into a private list, | 
|  | 914 | checking that they are all strings, then write that list | 
|  | 915 | without holding the interpreter lock, then come back for more. */ | 
|  | 916 | for (index = 0; ; index += CHUNKSIZE) { | 
|  | 917 | if (islist) { | 
|  | 918 | Py_XDECREF(list); | 
|  | 919 | list = PyList_GetSlice(seq, index, index+CHUNKSIZE); | 
|  | 920 | if (list == NULL) | 
|  | 921 | goto error; | 
|  | 922 | j = PyList_GET_SIZE(list); | 
|  | 923 | } | 
|  | 924 | else { | 
|  | 925 | for (j = 0; j < CHUNKSIZE; j++) { | 
|  | 926 | line = PyIter_Next(iter); | 
|  | 927 | if (line == NULL) { | 
|  | 928 | if (PyErr_Occurred()) | 
|  | 929 | goto error; | 
|  | 930 | break; | 
|  | 931 | } | 
|  | 932 | PyList_SetItem(list, j, line); | 
|  | 933 | } | 
|  | 934 | } | 
|  | 935 | if (j == 0) | 
|  | 936 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 937 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 938 | /* Check that all entries are indeed strings. If not, | 
|  | 939 | apply the same rules as for file.write() and | 
|  | 940 | convert the rets to strings. This is slow, but | 
|  | 941 | seems to be the only way since all conversion APIs | 
|  | 942 | could potentially execute Python code. */ | 
|  | 943 | for (i = 0; i < j; i++) { | 
|  | 944 | PyObject *v = PyList_GET_ITEM(list, i); | 
|  | 945 | if (!PyString_Check(v)) { | 
|  | 946 | const char *buffer; | 
|  | 947 | Py_ssize_t len; | 
|  | 948 | if (PyObject_AsCharBuffer(v, &buffer, &len)) { | 
|  | 949 | PyErr_SetString(PyExc_TypeError, | 
|  | 950 | "writelines() " | 
|  | 951 | "argument must be " | 
|  | 952 | "a sequence of " | 
|  | 953 | "strings"); | 
|  | 954 | goto error; | 
|  | 955 | } | 
|  | 956 | line = PyString_FromStringAndSize(buffer, | 
|  | 957 | len); | 
|  | 958 | if (line == NULL) | 
|  | 959 | goto error; | 
|  | 960 | Py_DECREF(v); | 
|  | 961 | PyList_SET_ITEM(list, i, line); | 
|  | 962 | } | 
|  | 963 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 964 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 965 | self->f_softspace = 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 966 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 967 | /* Since we are releasing the global lock, the | 
|  | 968 | following code may *not* execute Python code. */ | 
|  | 969 | Py_BEGIN_ALLOW_THREADS | 
|  | 970 | for (i = 0; i < j; i++) { | 
|  | 971 | line = PyList_GET_ITEM(list, i); | 
|  | 972 | len = PyString_GET_SIZE(line); | 
|  | 973 | BZ2_bzWrite (&bzerror, self->fp, | 
|  | 974 | PyString_AS_STRING(line), len); | 
|  | 975 | if (bzerror != BZ_OK) { | 
|  | 976 | Py_BLOCK_THREADS | 
|  | 977 | Util_CatchBZ2Error(bzerror); | 
|  | 978 | goto error; | 
|  | 979 | } | 
|  | 980 | } | 
|  | 981 | Py_END_ALLOW_THREADS | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 982 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 983 | if (j < CHUNKSIZE) | 
|  | 984 | break; | 
|  | 985 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 986 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 987 | Py_INCREF(Py_None); | 
|  | 988 | ret = Py_None; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 989 |  | 
|  | 990 | error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 991 | RELEASE_LOCK(self); | 
|  | 992 | Py_XDECREF(list); | 
|  | 993 | Py_XDECREF(iter); | 
|  | 994 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 995 | #undef CHUNKSIZE | 
|  | 996 | } | 
|  | 997 |  | 
|  | 998 | PyDoc_STRVAR(BZ2File_seek__doc__, | 
|  | 999 | "seek(offset [, whence]) -> None\n\ | 
|  | 1000 | \n\ | 
|  | 1001 | Move to new file position. Argument offset is a byte count. Optional\n\ | 
|  | 1002 | argument whence defaults to 0 (offset from start of file, offset\n\ | 
|  | 1003 | should be >= 0); other values are 1 (move relative to current position,\n\ | 
|  | 1004 | positive or negative), and 2 (move relative to end of file, usually\n\ | 
|  | 1005 | negative, although many platforms allow seeking beyond the end of a file).\n\ | 
|  | 1006 | \n\ | 
|  | 1007 | Note that seeking of bz2 files is emulated, and depending on the parameters\n\ | 
|  | 1008 | the operation may be extremely slow.\n\ | 
|  | 1009 | "); | 
|  | 1010 |  | 
|  | 1011 | static PyObject * | 
|  | 1012 | BZ2File_seek(BZ2FileObject *self, PyObject *args) | 
|  | 1013 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1014 | int where = 0; | 
|  | 1015 | PyObject *offobj; | 
|  | 1016 | Py_off_t offset; | 
|  | 1017 | char small_buffer[SMALLCHUNK]; | 
|  | 1018 | char *buffer = small_buffer; | 
|  | 1019 | size_t buffersize = SMALLCHUNK; | 
|  | 1020 | Py_off_t bytesread = 0; | 
|  | 1021 | size_t readsize; | 
|  | 1022 | int chunksize; | 
|  | 1023 | int bzerror; | 
|  | 1024 | PyObject *ret = NULL; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 1025 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1026 | if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) | 
|  | 1027 | return NULL; | 
| Georg Brandl | 33a5f2a | 2005-08-21 14:16:04 +0000 | [diff] [blame] | 1028 | #if !defined(HAVE_LARGEFILE_SUPPORT) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1029 | offset = PyInt_AsLong(offobj); | 
| Georg Brandl | 33a5f2a | 2005-08-21 14:16:04 +0000 | [diff] [blame] | 1030 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1031 | offset = PyLong_Check(offobj) ? | 
|  | 1032 | PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj); | 
| Georg Brandl | 33a5f2a | 2005-08-21 14:16:04 +0000 | [diff] [blame] | 1033 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1034 | if (PyErr_Occurred()) | 
|  | 1035 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1036 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1037 | ACQUIRE_LOCK(self); | 
|  | 1038 | Util_DropReadAhead(self); | 
|  | 1039 | switch (self->mode) { | 
|  | 1040 | case MODE_READ: | 
|  | 1041 | case MODE_READ_EOF: | 
|  | 1042 | break; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 1043 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1044 | case MODE_CLOSED: | 
|  | 1045 | PyErr_SetString(PyExc_ValueError, | 
|  | 1046 | "I/O operation on closed file"); | 
|  | 1047 | goto cleanup; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 1048 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1049 | default: | 
|  | 1050 | PyErr_SetString(PyExc_IOError, | 
|  | 1051 | "seek works only while reading"); | 
|  | 1052 | goto cleanup; | 
|  | 1053 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1054 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1055 | if (where == 2) { | 
|  | 1056 | if (self->size == -1) { | 
|  | 1057 | assert(self->mode != MODE_READ_EOF); | 
|  | 1058 | for (;;) { | 
|  | 1059 | Py_BEGIN_ALLOW_THREADS | 
|  | 1060 | chunksize = Util_UnivNewlineRead( | 
|  | 1061 | &bzerror, self->fp, | 
|  | 1062 | buffer, buffersize, | 
|  | 1063 | self); | 
|  | 1064 | self->pos += chunksize; | 
|  | 1065 | Py_END_ALLOW_THREADS | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1066 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1067 | bytesread += chunksize; | 
|  | 1068 | if (bzerror == BZ_STREAM_END) { | 
|  | 1069 | break; | 
|  | 1070 | } else if (bzerror != BZ_OK) { | 
|  | 1071 | Util_CatchBZ2Error(bzerror); | 
|  | 1072 | goto cleanup; | 
|  | 1073 | } | 
|  | 1074 | } | 
|  | 1075 | self->mode = MODE_READ_EOF; | 
|  | 1076 | self->size = self->pos; | 
|  | 1077 | bytesread = 0; | 
|  | 1078 | } | 
|  | 1079 | offset = self->size + offset; | 
|  | 1080 | } else if (where == 1) { | 
|  | 1081 | offset = self->pos + offset; | 
|  | 1082 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1083 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1084 | /* Before getting here, offset must be the absolute position the file | 
|  | 1085 | * pointer should be set to. */ | 
| Georg Brandl | 47fab92 | 2006-02-18 21:57:25 +0000 | [diff] [blame] | 1086 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1087 | if (offset >= self->pos) { | 
|  | 1088 | /* we can move forward */ | 
|  | 1089 | offset -= self->pos; | 
|  | 1090 | } else { | 
|  | 1091 | /* we cannot move back, so rewind the stream */ | 
|  | 1092 | BZ2_bzReadClose(&bzerror, self->fp); | 
|  | 1093 | if (self->fp) { | 
|  | 1094 | PyFile_DecUseCount((PyFileObject *)self->file); | 
|  | 1095 | self->fp = NULL; | 
|  | 1096 | } | 
|  | 1097 | if (bzerror != BZ_OK) { | 
|  | 1098 | Util_CatchBZ2Error(bzerror); | 
|  | 1099 | goto cleanup; | 
|  | 1100 | } | 
|  | 1101 | ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); | 
|  | 1102 | if (!ret) | 
|  | 1103 | goto cleanup; | 
|  | 1104 | Py_DECREF(ret); | 
|  | 1105 | ret = NULL; | 
|  | 1106 | self->pos = 0; | 
|  | 1107 | self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file), | 
|  | 1108 | 0, 0, NULL, 0); | 
|  | 1109 | if (self->fp) | 
|  | 1110 | PyFile_IncUseCount((PyFileObject *)self->file); | 
|  | 1111 | if (bzerror != BZ_OK) { | 
|  | 1112 | Util_CatchBZ2Error(bzerror); | 
|  | 1113 | goto cleanup; | 
|  | 1114 | } | 
|  | 1115 | self->mode = MODE_READ; | 
|  | 1116 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1117 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1118 | if (offset <= 0 || self->mode == MODE_READ_EOF) | 
|  | 1119 | goto exit; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1120 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1121 | /* Before getting here, offset must be set to the number of bytes | 
|  | 1122 | * to walk forward. */ | 
|  | 1123 | for (;;) { | 
|  | 1124 | if (offset-bytesread > buffersize) | 
|  | 1125 | readsize = buffersize; | 
|  | 1126 | else | 
|  | 1127 | /* offset might be wider that readsize, but the result | 
|  | 1128 | * of the subtraction is bound by buffersize (see the | 
|  | 1129 | * condition above). buffersize is 8192. */ | 
|  | 1130 | readsize = (size_t)(offset-bytesread); | 
|  | 1131 | Py_BEGIN_ALLOW_THREADS | 
|  | 1132 | chunksize = Util_UnivNewlineRead(&bzerror, self->fp, | 
|  | 1133 | buffer, readsize, self); | 
|  | 1134 | self->pos += chunksize; | 
|  | 1135 | Py_END_ALLOW_THREADS | 
|  | 1136 | bytesread += chunksize; | 
|  | 1137 | if (bzerror == BZ_STREAM_END) { | 
|  | 1138 | self->size = self->pos; | 
|  | 1139 | self->mode = MODE_READ_EOF; | 
|  | 1140 | break; | 
|  | 1141 | } else if (bzerror != BZ_OK) { | 
|  | 1142 | Util_CatchBZ2Error(bzerror); | 
|  | 1143 | goto cleanup; | 
|  | 1144 | } | 
|  | 1145 | if (bytesread == offset) | 
|  | 1146 | break; | 
|  | 1147 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1148 |  | 
|  | 1149 | exit: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1150 | Py_INCREF(Py_None); | 
|  | 1151 | ret = Py_None; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1152 |  | 
|  | 1153 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1154 | RELEASE_LOCK(self); | 
|  | 1155 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1156 | } | 
|  | 1157 |  | 
|  | 1158 | PyDoc_STRVAR(BZ2File_tell__doc__, | 
|  | 1159 | "tell() -> int\n\ | 
|  | 1160 | \n\ | 
|  | 1161 | Return the current file position, an integer (may be a long integer).\n\ | 
|  | 1162 | "); | 
|  | 1163 |  | 
|  | 1164 | static PyObject * | 
|  | 1165 | BZ2File_tell(BZ2FileObject *self, PyObject *args) | 
|  | 1166 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1167 | PyObject *ret = NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1168 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1169 | if (self->mode == MODE_CLOSED) { | 
|  | 1170 | PyErr_SetString(PyExc_ValueError, | 
|  | 1171 | "I/O operation on closed file"); | 
|  | 1172 | goto cleanup; | 
|  | 1173 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1174 |  | 
| Georg Brandl | a8bcecc | 2005-09-03 07:49:53 +0000 | [diff] [blame] | 1175 | #if !defined(HAVE_LARGEFILE_SUPPORT) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1176 | ret = PyInt_FromLong(self->pos); | 
| Georg Brandl | a8bcecc | 2005-09-03 07:49:53 +0000 | [diff] [blame] | 1177 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1178 | ret = PyLong_FromLongLong(self->pos); | 
| Georg Brandl | a8bcecc | 2005-09-03 07:49:53 +0000 | [diff] [blame] | 1179 | #endif | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1180 |  | 
|  | 1181 | cleanup: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1182 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1183 | } | 
|  | 1184 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1185 | PyDoc_STRVAR(BZ2File_close__doc__, | 
|  | 1186 | "close() -> None or (perhaps) an integer\n\ | 
|  | 1187 | \n\ | 
|  | 1188 | Close the file. Sets data attribute .closed to true. A closed file\n\ | 
|  | 1189 | cannot be used for further I/O operations. close() may be called more\n\ | 
|  | 1190 | than once without error.\n\ | 
|  | 1191 | "); | 
|  | 1192 |  | 
|  | 1193 | static PyObject * | 
|  | 1194 | BZ2File_close(BZ2FileObject *self) | 
|  | 1195 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1196 | PyObject *ret = NULL; | 
|  | 1197 | int bzerror = BZ_OK; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1198 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1199 | ACQUIRE_LOCK(self); | 
|  | 1200 | switch (self->mode) { | 
|  | 1201 | case MODE_READ: | 
|  | 1202 | case MODE_READ_EOF: | 
|  | 1203 | BZ2_bzReadClose(&bzerror, self->fp); | 
|  | 1204 | break; | 
|  | 1205 | case MODE_WRITE: | 
|  | 1206 | BZ2_bzWriteClose(&bzerror, self->fp, | 
|  | 1207 | 0, NULL, NULL); | 
|  | 1208 | break; | 
|  | 1209 | } | 
| Nadeem Vawda | 337c50b | 2013-12-08 15:31:50 +0100 | [diff] [blame] | 1210 | if (self->file) { | 
|  | 1211 | if (self->fp) | 
|  | 1212 | PyFile_DecUseCount((PyFileObject *)self->file); | 
|  | 1213 | ret = PyObject_CallMethod(self->file, "close", NULL); | 
|  | 1214 | } else { | 
|  | 1215 | Py_INCREF(Py_None); | 
|  | 1216 | ret = Py_None; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1217 | } | 
| Nadeem Vawda | 337c50b | 2013-12-08 15:31:50 +0100 | [diff] [blame] | 1218 | self->fp = NULL; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1219 | self->mode = MODE_CLOSED; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1220 | if (bzerror != BZ_OK) { | 
|  | 1221 | Util_CatchBZ2Error(bzerror); | 
|  | 1222 | Py_XDECREF(ret); | 
|  | 1223 | ret = NULL; | 
|  | 1224 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1225 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1226 | RELEASE_LOCK(self); | 
|  | 1227 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1228 | } | 
|  | 1229 |  | 
| Antoine Pitrou | b74fc2b | 2009-01-10 16:13:45 +0000 | [diff] [blame] | 1230 | PyDoc_STRVAR(BZ2File_enter_doc, | 
|  | 1231 | "__enter__() -> self."); | 
|  | 1232 |  | 
|  | 1233 | static PyObject * | 
|  | 1234 | BZ2File_enter(BZ2FileObject *self) | 
|  | 1235 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1236 | if (self->mode == MODE_CLOSED) { | 
|  | 1237 | PyErr_SetString(PyExc_ValueError, | 
|  | 1238 | "I/O operation on closed file"); | 
|  | 1239 | return NULL; | 
|  | 1240 | } | 
|  | 1241 | Py_INCREF(self); | 
|  | 1242 | return (PyObject *) self; | 
| Antoine Pitrou | b74fc2b | 2009-01-10 16:13:45 +0000 | [diff] [blame] | 1243 | } | 
|  | 1244 |  | 
|  | 1245 | PyDoc_STRVAR(BZ2File_exit_doc, | 
|  | 1246 | "__exit__(*excinfo) -> None.  Closes the file."); | 
|  | 1247 |  | 
|  | 1248 | static PyObject * | 
|  | 1249 | BZ2File_exit(BZ2FileObject *self, PyObject *args) | 
|  | 1250 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1251 | PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL); | 
|  | 1252 | if (!ret) | 
|  | 1253 | /* If error occurred, pass through */ | 
|  | 1254 | return NULL; | 
|  | 1255 | Py_DECREF(ret); | 
|  | 1256 | Py_RETURN_NONE; | 
| Antoine Pitrou | b74fc2b | 2009-01-10 16:13:45 +0000 | [diff] [blame] | 1257 | } | 
|  | 1258 |  | 
|  | 1259 |  | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1260 | static PyObject *BZ2File_getiter(BZ2FileObject *self); | 
|  | 1261 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1262 | static PyMethodDef BZ2File_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1263 | {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__}, | 
|  | 1264 | {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__}, | 
|  | 1265 | {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__}, | 
|  | 1266 | {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__}, | 
|  | 1267 | {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__}, | 
|  | 1268 | {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__}, | 
|  | 1269 | {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__}, | 
|  | 1270 | {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__}, | 
|  | 1271 | {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__}, | 
|  | 1272 | {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc}, | 
|  | 1273 | {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc}, | 
|  | 1274 | {NULL,              NULL}           /* sentinel */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1275 | }; | 
|  | 1276 |  | 
|  | 1277 |  | 
|  | 1278 | /* ===================================================================== */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1279 | /* Getters and setters of BZ2File. */ | 
|  | 1280 |  | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1281 | /* This is a hacked version of Python's fileobject.c:get_newlines(). */ | 
|  | 1282 | static PyObject * | 
|  | 1283 | BZ2File_get_newlines(BZ2FileObject *self, void *closure) | 
|  | 1284 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1285 | switch (self->f_newlinetypes) { | 
|  | 1286 | case NEWLINE_UNKNOWN: | 
|  | 1287 | Py_INCREF(Py_None); | 
|  | 1288 | return Py_None; | 
|  | 1289 | case NEWLINE_CR: | 
|  | 1290 | return PyString_FromString("\r"); | 
|  | 1291 | case NEWLINE_LF: | 
|  | 1292 | return PyString_FromString("\n"); | 
|  | 1293 | case NEWLINE_CR|NEWLINE_LF: | 
|  | 1294 | return Py_BuildValue("(ss)", "\r", "\n"); | 
|  | 1295 | case NEWLINE_CRLF: | 
|  | 1296 | return PyString_FromString("\r\n"); | 
|  | 1297 | case NEWLINE_CR|NEWLINE_CRLF: | 
|  | 1298 | return Py_BuildValue("(ss)", "\r", "\r\n"); | 
|  | 1299 | case NEWLINE_LF|NEWLINE_CRLF: | 
|  | 1300 | return Py_BuildValue("(ss)", "\n", "\r\n"); | 
|  | 1301 | case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: | 
|  | 1302 | return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); | 
|  | 1303 | default: | 
|  | 1304 | PyErr_Format(PyExc_SystemError, | 
|  | 1305 | "Unknown newlines value 0x%x\n", | 
|  | 1306 | self->f_newlinetypes); | 
|  | 1307 | return NULL; | 
|  | 1308 | } | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1309 | } | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1310 |  | 
|  | 1311 | static PyObject * | 
|  | 1312 | BZ2File_get_closed(BZ2FileObject *self, void *closure) | 
|  | 1313 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1314 | return PyInt_FromLong(self->mode == MODE_CLOSED); | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1315 | } | 
|  | 1316 |  | 
|  | 1317 | static PyObject * | 
|  | 1318 | BZ2File_get_mode(BZ2FileObject *self, void *closure) | 
|  | 1319 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1320 | return PyObject_GetAttrString(self->file, "mode"); | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1321 | } | 
|  | 1322 |  | 
|  | 1323 | static PyObject * | 
|  | 1324 | BZ2File_get_name(BZ2FileObject *self, void *closure) | 
|  | 1325 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1326 | return PyObject_GetAttrString(self->file, "name"); | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1327 | } | 
|  | 1328 |  | 
|  | 1329 | static PyGetSetDef BZ2File_getset[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1330 | {"closed", (getter)BZ2File_get_closed, NULL, | 
|  | 1331 | "True if the file is closed"}, | 
|  | 1332 | {"newlines", (getter)BZ2File_get_newlines, NULL, | 
|  | 1333 | "end-of-line convention used in this file"}, | 
|  | 1334 | {"mode", (getter)BZ2File_get_mode, NULL, | 
|  | 1335 | "file mode ('r', 'w', or 'U')"}, | 
|  | 1336 | {"name", (getter)BZ2File_get_name, NULL, | 
|  | 1337 | "file name"}, | 
|  | 1338 | {NULL}      /* Sentinel */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1339 | }; | 
|  | 1340 |  | 
|  | 1341 |  | 
|  | 1342 | /* ===================================================================== */ | 
|  | 1343 | /* Members of BZ2File_Type. */ | 
|  | 1344 |  | 
|  | 1345 | #undef OFF | 
|  | 1346 | #define OFF(x) offsetof(BZ2FileObject, x) | 
|  | 1347 |  | 
|  | 1348 | static PyMemberDef BZ2File_members[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1349 | {"softspace",       T_INT,          OFF(f_softspace), 0, | 
|  | 1350 | "flag indicating that a space needs to be printed; used by print"}, | 
|  | 1351 | {NULL}      /* Sentinel */ | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1352 | }; | 
|  | 1353 |  | 
|  | 1354 | /* ===================================================================== */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1355 | /* Slot definitions for BZ2File_Type. */ | 
|  | 1356 |  | 
|  | 1357 | static int | 
|  | 1358 | BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) | 
|  | 1359 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1360 | static char *kwlist[] = {"filename", "mode", "buffering", | 
|  | 1361 | "compresslevel", 0}; | 
|  | 1362 | PyObject *name; | 
|  | 1363 | char *mode = "r"; | 
|  | 1364 | int buffering = -1; | 
|  | 1365 | int compresslevel = 9; | 
|  | 1366 | int bzerror; | 
|  | 1367 | int mode_char = 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1368 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1369 | self->size = -1; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 1370 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1371 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", | 
|  | 1372 | kwlist, &name, &mode, &buffering, | 
|  | 1373 | &compresslevel)) | 
|  | 1374 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1375 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1376 | if (compresslevel < 1 || compresslevel > 9) { | 
|  | 1377 | PyErr_SetString(PyExc_ValueError, | 
|  | 1378 | "compresslevel must be between 1 and 9"); | 
|  | 1379 | return -1; | 
|  | 1380 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1381 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1382 | for (;;) { | 
|  | 1383 | int error = 0; | 
|  | 1384 | switch (*mode) { | 
|  | 1385 | case 'r': | 
|  | 1386 | case 'w': | 
|  | 1387 | if (mode_char) | 
|  | 1388 | error = 1; | 
|  | 1389 | mode_char = *mode; | 
|  | 1390 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1391 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1392 | case 'b': | 
|  | 1393 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1394 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1395 | case 'U': | 
| Neal Norwitz | 2a30cd0 | 2006-07-10 01:18:57 +0000 | [diff] [blame] | 1396 | #ifdef __VMS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1397 | self->f_univ_newline = 0; | 
| Neal Norwitz | 2a30cd0 | 2006-07-10 01:18:57 +0000 | [diff] [blame] | 1398 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1399 | self->f_univ_newline = 1; | 
| Neal Norwitz | 2a30cd0 | 2006-07-10 01:18:57 +0000 | [diff] [blame] | 1400 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1401 | break; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1402 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1403 | default: | 
|  | 1404 | error = 1; | 
|  | 1405 | break; | 
|  | 1406 | } | 
|  | 1407 | if (error) { | 
|  | 1408 | PyErr_Format(PyExc_ValueError, | 
|  | 1409 | "invalid mode char %c", *mode); | 
|  | 1410 | return -1; | 
|  | 1411 | } | 
|  | 1412 | mode++; | 
|  | 1413 | if (*mode == '\0') | 
|  | 1414 | break; | 
|  | 1415 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1416 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1417 | if (mode_char == 0) { | 
|  | 1418 | mode_char = 'r'; | 
|  | 1419 | } | 
| Georg Brandl | 6b95f1d | 2005-06-03 19:47:00 +0000 | [diff] [blame] | 1420 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1421 | mode = (mode_char == 'r') ? "rb" : "wb"; | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 1422 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1423 | self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)", | 
|  | 1424 | name, mode, buffering); | 
|  | 1425 | if (self->file == NULL) | 
|  | 1426 | return -1; | 
| Gustavo Niemeyer | 49ea7be | 2002-11-08 14:31:49 +0000 | [diff] [blame] | 1427 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1428 | /* From now on, we have stuff to dealloc, so jump to error label | 
|  | 1429 | * instead of returning */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1430 |  | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1431 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1432 | self->lock = PyThread_allocate_lock(); | 
|  | 1433 | if (!self->lock) { | 
|  | 1434 | PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); | 
|  | 1435 | goto error; | 
|  | 1436 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1437 | #endif | 
|  | 1438 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1439 | if (mode_char == 'r') | 
|  | 1440 | self->fp = BZ2_bzReadOpen(&bzerror, | 
|  | 1441 | PyFile_AsFile(self->file), | 
|  | 1442 | 0, 0, NULL, 0); | 
|  | 1443 | else | 
|  | 1444 | self->fp = BZ2_bzWriteOpen(&bzerror, | 
|  | 1445 | PyFile_AsFile(self->file), | 
|  | 1446 | compresslevel, 0, 0); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1447 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1448 | if (bzerror != BZ_OK) { | 
|  | 1449 | Util_CatchBZ2Error(bzerror); | 
|  | 1450 | goto error; | 
|  | 1451 | } | 
|  | 1452 | PyFile_IncUseCount((PyFileObject *)self->file); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1453 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1454 | self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1455 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1456 | return 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1457 |  | 
|  | 1458 | error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1459 | Py_CLEAR(self->file); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1460 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1461 | if (self->lock) { | 
|  | 1462 | PyThread_free_lock(self->lock); | 
|  | 1463 | self->lock = NULL; | 
|  | 1464 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1465 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1466 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1467 | } | 
|  | 1468 |  | 
|  | 1469 | static void | 
|  | 1470 | BZ2File_dealloc(BZ2FileObject *self) | 
|  | 1471 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1472 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1473 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1474 | if (self->lock) | 
|  | 1475 | PyThread_free_lock(self->lock); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1476 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1477 | switch (self->mode) { | 
|  | 1478 | case MODE_READ: | 
|  | 1479 | case MODE_READ_EOF: | 
|  | 1480 | BZ2_bzReadClose(&bzerror, self->fp); | 
|  | 1481 | break; | 
|  | 1482 | case MODE_WRITE: | 
|  | 1483 | BZ2_bzWriteClose(&bzerror, self->fp, | 
|  | 1484 | 0, NULL, NULL); | 
|  | 1485 | break; | 
|  | 1486 | } | 
| Nadeem Vawda | 337c50b | 2013-12-08 15:31:50 +0100 | [diff] [blame] | 1487 | if (self->fp != NULL && self->file != NULL) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1488 | PyFile_DecUseCount((PyFileObject *)self->file); | 
| Nadeem Vawda | 337c50b | 2013-12-08 15:31:50 +0100 | [diff] [blame] | 1489 | self->fp = NULL; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1490 | Util_DropReadAhead(self); | 
|  | 1491 | Py_XDECREF(self->file); | 
|  | 1492 | Py_TYPE(self)->tp_free((PyObject *)self); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1493 | } | 
|  | 1494 |  | 
|  | 1495 | /* This is a hacked version of Python's fileobject.c:file_getiter(). */ | 
|  | 1496 | static PyObject * | 
|  | 1497 | BZ2File_getiter(BZ2FileObject *self) | 
|  | 1498 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1499 | if (self->mode == MODE_CLOSED) { | 
|  | 1500 | PyErr_SetString(PyExc_ValueError, | 
|  | 1501 | "I/O operation on closed file"); | 
|  | 1502 | return NULL; | 
|  | 1503 | } | 
|  | 1504 | Py_INCREF((PyObject*)self); | 
|  | 1505 | return (PyObject *)self; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1506 | } | 
|  | 1507 |  | 
|  | 1508 | /* This is a hacked version of Python's fileobject.c:file_iternext(). */ | 
|  | 1509 | #define READAHEAD_BUFSIZE 8192 | 
|  | 1510 | static PyObject * | 
|  | 1511 | BZ2File_iternext(BZ2FileObject *self) | 
|  | 1512 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1513 | PyStringObject* ret; | 
|  | 1514 | ACQUIRE_LOCK(self); | 
|  | 1515 | if (self->mode == MODE_CLOSED) { | 
|  | 1516 | RELEASE_LOCK(self); | 
|  | 1517 | PyErr_SetString(PyExc_ValueError, | 
|  | 1518 | "I/O operation on closed file"); | 
|  | 1519 | return NULL; | 
|  | 1520 | } | 
|  | 1521 | ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); | 
|  | 1522 | RELEASE_LOCK(self); | 
|  | 1523 | if (ret == NULL || PyString_GET_SIZE(ret) == 0) { | 
|  | 1524 | Py_XDECREF(ret); | 
|  | 1525 | return NULL; | 
|  | 1526 | } | 
|  | 1527 | return (PyObject *)ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1528 | } | 
|  | 1529 |  | 
|  | 1530 | /* ===================================================================== */ | 
|  | 1531 | /* BZ2File_Type definition. */ | 
|  | 1532 |  | 
|  | 1533 | PyDoc_VAR(BZ2File__doc__) = | 
|  | 1534 | PyDoc_STR( | 
|  | 1535 | "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\ | 
|  | 1536 | \n\ | 
|  | 1537 | Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\ | 
|  | 1538 | writing. When opened for writing, the file will be created if it doesn't\n\ | 
|  | 1539 | exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ | 
|  | 1540 | unbuffered, and larger numbers specify the buffer size. If compresslevel\n\ | 
|  | 1541 | is given, must be a number between 1 and 9.\n\ | 
|  | 1542 | ") | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1543 | PyDoc_STR( | 
|  | 1544 | "\n\ | 
|  | 1545 | Add a 'U' to mode to open the file for input with universal newline\n\ | 
|  | 1546 | support. Any line ending in the input file will be seen as a '\\n' in\n\ | 
|  | 1547 | Python. Also, a file so opened gains the attribute 'newlines'; the value\n\ | 
|  | 1548 | for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\ | 
|  | 1549 | '\\r\\n' or a tuple containing all the newline types seen. Universal\n\ | 
|  | 1550 | newlines are available only when reading.\n\ | 
|  | 1551 | ") | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1552 | ; | 
|  | 1553 |  | 
| Gustavo Niemeyer | 49ea7be | 2002-11-08 14:31:49 +0000 | [diff] [blame] | 1554 | static PyTypeObject BZ2File_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1555 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 1556 | "bz2.BZ2File",              /*tp_name*/ | 
|  | 1557 | sizeof(BZ2FileObject),      /*tp_basicsize*/ | 
|  | 1558 | 0,                          /*tp_itemsize*/ | 
|  | 1559 | (destructor)BZ2File_dealloc, /*tp_dealloc*/ | 
|  | 1560 | 0,                          /*tp_print*/ | 
|  | 1561 | 0,                          /*tp_getattr*/ | 
|  | 1562 | 0,                          /*tp_setattr*/ | 
|  | 1563 | 0,                          /*tp_compare*/ | 
|  | 1564 | 0,                          /*tp_repr*/ | 
|  | 1565 | 0,                          /*tp_as_number*/ | 
|  | 1566 | 0,                          /*tp_as_sequence*/ | 
|  | 1567 | 0,                          /*tp_as_mapping*/ | 
|  | 1568 | 0,                          /*tp_hash*/ | 
|  | 1569 | 0,                      /*tp_call*/ | 
|  | 1570 | 0,                      /*tp_str*/ | 
|  | 1571 | PyObject_GenericGetAttr,/*tp_getattro*/ | 
|  | 1572 | PyObject_GenericSetAttr,/*tp_setattro*/ | 
|  | 1573 | 0,                      /*tp_as_buffer*/ | 
|  | 1574 | Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ | 
|  | 1575 | BZ2File__doc__,         /*tp_doc*/ | 
|  | 1576 | 0,                      /*tp_traverse*/ | 
|  | 1577 | 0,                      /*tp_clear*/ | 
|  | 1578 | 0,                      /*tp_richcompare*/ | 
|  | 1579 | 0,                      /*tp_weaklistoffset*/ | 
|  | 1580 | (getiterfunc)BZ2File_getiter, /*tp_iter*/ | 
|  | 1581 | (iternextfunc)BZ2File_iternext, /*tp_iternext*/ | 
|  | 1582 | BZ2File_methods,        /*tp_methods*/ | 
|  | 1583 | BZ2File_members,        /*tp_members*/ | 
|  | 1584 | BZ2File_getset,         /*tp_getset*/ | 
|  | 1585 | 0,                      /*tp_base*/ | 
|  | 1586 | 0,                      /*tp_dict*/ | 
|  | 1587 | 0,                      /*tp_descr_get*/ | 
|  | 1588 | 0,                      /*tp_descr_set*/ | 
|  | 1589 | 0,                      /*tp_dictoffset*/ | 
|  | 1590 | (initproc)BZ2File_init, /*tp_init*/ | 
|  | 1591 | PyType_GenericAlloc,    /*tp_alloc*/ | 
|  | 1592 | PyType_GenericNew,      /*tp_new*/ | 
|  | 1593 | _PyObject_Del,          /*tp_free*/ | 
|  | 1594 | 0,                      /*tp_is_gc*/ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1595 | }; | 
|  | 1596 |  | 
|  | 1597 |  | 
|  | 1598 | /* ===================================================================== */ | 
|  | 1599 | /* Methods of BZ2Comp. */ | 
|  | 1600 |  | 
|  | 1601 | PyDoc_STRVAR(BZ2Comp_compress__doc__, | 
|  | 1602 | "compress(data) -> string\n\ | 
|  | 1603 | \n\ | 
|  | 1604 | Provide more data to the compressor object. It will return chunks of\n\ | 
|  | 1605 | compressed data whenever possible. When you've finished providing data\n\ | 
|  | 1606 | to compress, call the flush() method to finish the compression process,\n\ | 
|  | 1607 | and return what is left in the internal buffers.\n\ | 
|  | 1608 | "); | 
|  | 1609 |  | 
|  | 1610 | static PyObject * | 
|  | 1611 | BZ2Comp_compress(BZ2CompObject *self, PyObject *args) | 
|  | 1612 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1613 | Py_buffer pdata; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1614 | size_t input_left; | 
|  | 1615 | size_t output_size = 0; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1616 | PyObject *ret = NULL; | 
|  | 1617 | bz_stream *bzs = &self->bzs; | 
|  | 1618 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1619 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1620 | if (!PyArg_ParseTuple(args, "s*:compress", &pdata)) | 
|  | 1621 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1622 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1623 | if (pdata.len == 0) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1624 | PyBuffer_Release(&pdata); | 
|  | 1625 | return PyString_FromString(""); | 
|  | 1626 | } | 
| Gustavo Niemeyer | a6e436e | 2004-02-14 00:02:45 +0000 | [diff] [blame] | 1627 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1628 | ACQUIRE_LOCK(self); | 
|  | 1629 | if (!self->running) { | 
|  | 1630 | PyErr_SetString(PyExc_ValueError, | 
|  | 1631 | "this object was already flushed"); | 
|  | 1632 | goto error; | 
|  | 1633 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1634 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1635 | ret = PyString_FromStringAndSize(NULL, SMALLCHUNK); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1636 | if (!ret) | 
|  | 1637 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1638 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1639 | bzs->next_in = pdata.buf; | 
|  | 1640 | bzs->avail_in = MIN(pdata.len, UINT_MAX); | 
|  | 1641 | input_left = pdata.len - bzs->avail_in; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1642 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1643 | bzs->next_out = BUF(ret); | 
|  | 1644 | bzs->avail_out = PyString_GET_SIZE(ret); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1645 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1646 | for (;;) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1647 | char *saved_next_out; | 
|  | 1648 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1649 | Py_BEGIN_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1650 | saved_next_out = bzs->next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1651 | bzerror = BZ2_bzCompress(bzs, BZ_RUN); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1652 | output_size += bzs->next_out - saved_next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1653 | Py_END_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1654 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1655 | if (bzerror != BZ_RUN_OK) { | 
|  | 1656 | Util_CatchBZ2Error(bzerror); | 
|  | 1657 | goto error; | 
|  | 1658 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1659 | if (bzs->avail_in == 0) { | 
|  | 1660 | if (input_left == 0) | 
|  | 1661 | break; /* no more input data */ | 
|  | 1662 | bzs->avail_in = MIN(input_left, UINT_MAX); | 
|  | 1663 | input_left -= bzs->avail_in; | 
|  | 1664 | } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1665 | if (bzs->avail_out == 0) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1666 | size_t buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 1667 | if (buffer_left == 0) { | 
|  | 1668 | if (Util_GrowBuffer(&ret) < 0) { | 
|  | 1669 | BZ2_bzCompressEnd(bzs); | 
|  | 1670 | goto error; | 
|  | 1671 | } | 
|  | 1672 | bzs->next_out = BUF(ret) + output_size; | 
|  | 1673 | buffer_left = PyString_GET_SIZE(ret) - output_size; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1674 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1675 | bzs->avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1676 | } | 
|  | 1677 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1678 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1679 | if (_PyString_Resize(&ret, output_size) < 0) | 
|  | 1680 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1681 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1682 | RELEASE_LOCK(self); | 
|  | 1683 | PyBuffer_Release(&pdata); | 
|  | 1684 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1685 |  | 
|  | 1686 | error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1687 | RELEASE_LOCK(self); | 
|  | 1688 | PyBuffer_Release(&pdata); | 
|  | 1689 | Py_XDECREF(ret); | 
|  | 1690 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1691 | } | 
|  | 1692 |  | 
|  | 1693 | PyDoc_STRVAR(BZ2Comp_flush__doc__, | 
|  | 1694 | "flush() -> string\n\ | 
|  | 1695 | \n\ | 
|  | 1696 | Finish the compression process and return what is left in internal buffers.\n\ | 
|  | 1697 | You must not use the compressor object after calling this method.\n\ | 
|  | 1698 | "); | 
|  | 1699 |  | 
|  | 1700 | static PyObject * | 
|  | 1701 | BZ2Comp_flush(BZ2CompObject *self) | 
|  | 1702 | { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1703 | size_t output_size = 0; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1704 | PyObject *ret = NULL; | 
|  | 1705 | bz_stream *bzs = &self->bzs; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1706 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1707 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1708 | ACQUIRE_LOCK(self); | 
|  | 1709 | if (!self->running) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1710 | PyErr_SetString(PyExc_ValueError, "object was already flushed"); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1711 | goto error; | 
|  | 1712 | } | 
|  | 1713 | self->running = 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1714 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1715 | ret = PyString_FromStringAndSize(NULL, SMALLCHUNK); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1716 | if (!ret) | 
|  | 1717 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1718 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1719 | bzs->next_out = BUF(ret); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1720 | bzs->avail_out = PyString_GET_SIZE(ret); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1721 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1722 | for (;;) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1723 | char *saved_next_out; | 
|  | 1724 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1725 | Py_BEGIN_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1726 | saved_next_out = bzs->next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1727 | bzerror = BZ2_bzCompress(bzs, BZ_FINISH); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1728 | output_size += bzs->next_out - saved_next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1729 | Py_END_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1730 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1731 | if (bzerror == BZ_STREAM_END) { | 
|  | 1732 | break; | 
|  | 1733 | } else if (bzerror != BZ_FINISH_OK) { | 
|  | 1734 | Util_CatchBZ2Error(bzerror); | 
|  | 1735 | goto error; | 
|  | 1736 | } | 
|  | 1737 | if (bzs->avail_out == 0) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1738 | size_t buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 1739 | if (buffer_left == 0) { | 
|  | 1740 | if (Util_GrowBuffer(&ret) < 0) | 
|  | 1741 | goto error; | 
|  | 1742 | bzs->next_out = BUF(ret) + output_size; | 
|  | 1743 | buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 1744 | } | 
|  | 1745 | bzs->avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1746 | } | 
|  | 1747 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1748 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1749 | if (output_size != PyString_GET_SIZE(ret)) | 
|  | 1750 | if (_PyString_Resize(&ret, output_size) < 0) | 
|  | 1751 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1752 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1753 | RELEASE_LOCK(self); | 
|  | 1754 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1755 |  | 
|  | 1756 | error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1757 | RELEASE_LOCK(self); | 
|  | 1758 | Py_XDECREF(ret); | 
|  | 1759 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1760 | } | 
|  | 1761 |  | 
|  | 1762 | static PyMethodDef BZ2Comp_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1763 | {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS, | 
|  | 1764 | BZ2Comp_compress__doc__}, | 
|  | 1765 | {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS, | 
|  | 1766 | BZ2Comp_flush__doc__}, | 
|  | 1767 | {NULL,              NULL}           /* sentinel */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1768 | }; | 
|  | 1769 |  | 
|  | 1770 |  | 
|  | 1771 | /* ===================================================================== */ | 
|  | 1772 | /* Slot definitions for BZ2Comp_Type. */ | 
|  | 1773 |  | 
|  | 1774 | static int | 
|  | 1775 | BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs) | 
|  | 1776 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1777 | int compresslevel = 9; | 
|  | 1778 | int bzerror; | 
|  | 1779 | static char *kwlist[] = {"compresslevel", 0}; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1780 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1781 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor", | 
|  | 1782 | kwlist, &compresslevel)) | 
|  | 1783 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1784 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1785 | if (compresslevel < 1 || compresslevel > 9) { | 
|  | 1786 | PyErr_SetString(PyExc_ValueError, | 
|  | 1787 | "compresslevel must be between 1 and 9"); | 
|  | 1788 | goto error; | 
|  | 1789 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1790 |  | 
|  | 1791 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1792 | self->lock = PyThread_allocate_lock(); | 
|  | 1793 | if (!self->lock) { | 
|  | 1794 | PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); | 
|  | 1795 | goto error; | 
|  | 1796 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1797 | #endif | 
|  | 1798 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1799 | memset(&self->bzs, 0, sizeof(bz_stream)); | 
|  | 1800 | bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); | 
|  | 1801 | if (bzerror != BZ_OK) { | 
|  | 1802 | Util_CatchBZ2Error(bzerror); | 
|  | 1803 | goto error; | 
|  | 1804 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1805 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1806 | self->running = 1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1807 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1808 | return 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1809 | error: | 
|  | 1810 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1811 | if (self->lock) { | 
|  | 1812 | PyThread_free_lock(self->lock); | 
|  | 1813 | self->lock = NULL; | 
|  | 1814 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1815 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1816 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1817 | } | 
|  | 1818 |  | 
|  | 1819 | static void | 
|  | 1820 | BZ2Comp_dealloc(BZ2CompObject *self) | 
|  | 1821 | { | 
|  | 1822 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1823 | if (self->lock) | 
|  | 1824 | PyThread_free_lock(self->lock); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1825 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1826 | BZ2_bzCompressEnd(&self->bzs); | 
|  | 1827 | Py_TYPE(self)->tp_free((PyObject *)self); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1828 | } | 
|  | 1829 |  | 
|  | 1830 |  | 
|  | 1831 | /* ===================================================================== */ | 
|  | 1832 | /* BZ2Comp_Type definition. */ | 
|  | 1833 |  | 
|  | 1834 | PyDoc_STRVAR(BZ2Comp__doc__, | 
|  | 1835 | "BZ2Compressor([compresslevel=9]) -> compressor object\n\ | 
|  | 1836 | \n\ | 
|  | 1837 | Create a new compressor object. This object may be used to compress\n\ | 
|  | 1838 | data sequentially. If you want to compress data in one shot, use the\n\ | 
|  | 1839 | compress() function instead. The compresslevel parameter, if given,\n\ | 
|  | 1840 | must be a number between 1 and 9.\n\ | 
|  | 1841 | "); | 
|  | 1842 |  | 
| Gustavo Niemeyer | 49ea7be | 2002-11-08 14:31:49 +0000 | [diff] [blame] | 1843 | static PyTypeObject BZ2Comp_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1844 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 1845 | "bz2.BZ2Compressor",        /*tp_name*/ | 
|  | 1846 | sizeof(BZ2CompObject),      /*tp_basicsize*/ | 
|  | 1847 | 0,                          /*tp_itemsize*/ | 
|  | 1848 | (destructor)BZ2Comp_dealloc, /*tp_dealloc*/ | 
|  | 1849 | 0,                          /*tp_print*/ | 
|  | 1850 | 0,                          /*tp_getattr*/ | 
|  | 1851 | 0,                          /*tp_setattr*/ | 
|  | 1852 | 0,                          /*tp_compare*/ | 
|  | 1853 | 0,                          /*tp_repr*/ | 
|  | 1854 | 0,                          /*tp_as_number*/ | 
|  | 1855 | 0,                          /*tp_as_sequence*/ | 
|  | 1856 | 0,                          /*tp_as_mapping*/ | 
|  | 1857 | 0,                          /*tp_hash*/ | 
|  | 1858 | 0,                      /*tp_call*/ | 
|  | 1859 | 0,                      /*tp_str*/ | 
|  | 1860 | PyObject_GenericGetAttr,/*tp_getattro*/ | 
|  | 1861 | PyObject_GenericSetAttr,/*tp_setattro*/ | 
|  | 1862 | 0,                      /*tp_as_buffer*/ | 
|  | 1863 | Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ | 
|  | 1864 | BZ2Comp__doc__,         /*tp_doc*/ | 
|  | 1865 | 0,                      /*tp_traverse*/ | 
|  | 1866 | 0,                      /*tp_clear*/ | 
|  | 1867 | 0,                      /*tp_richcompare*/ | 
|  | 1868 | 0,                      /*tp_weaklistoffset*/ | 
|  | 1869 | 0,                      /*tp_iter*/ | 
|  | 1870 | 0,                      /*tp_iternext*/ | 
|  | 1871 | BZ2Comp_methods,        /*tp_methods*/ | 
|  | 1872 | 0,                      /*tp_members*/ | 
|  | 1873 | 0,                      /*tp_getset*/ | 
|  | 1874 | 0,                      /*tp_base*/ | 
|  | 1875 | 0,                      /*tp_dict*/ | 
|  | 1876 | 0,                      /*tp_descr_get*/ | 
|  | 1877 | 0,                      /*tp_descr_set*/ | 
|  | 1878 | 0,                      /*tp_dictoffset*/ | 
|  | 1879 | (initproc)BZ2Comp_init, /*tp_init*/ | 
|  | 1880 | PyType_GenericAlloc,    /*tp_alloc*/ | 
|  | 1881 | PyType_GenericNew,      /*tp_new*/ | 
|  | 1882 | _PyObject_Del,          /*tp_free*/ | 
|  | 1883 | 0,                      /*tp_is_gc*/ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1884 | }; | 
|  | 1885 |  | 
|  | 1886 |  | 
|  | 1887 | /* ===================================================================== */ | 
|  | 1888 | /* Members of BZ2Decomp. */ | 
|  | 1889 |  | 
| Gustavo Niemeyer | a33d0aa | 2003-02-11 18:46:20 +0000 | [diff] [blame] | 1890 | #undef OFF | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1891 | #define OFF(x) offsetof(BZ2DecompObject, x) | 
|  | 1892 |  | 
|  | 1893 | static PyMemberDef BZ2Decomp_members[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1894 | {"unused_data", T_OBJECT, OFF(unused_data), RO}, | 
|  | 1895 | {NULL}      /* Sentinel */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1896 | }; | 
|  | 1897 |  | 
|  | 1898 |  | 
|  | 1899 | /* ===================================================================== */ | 
|  | 1900 | /* Methods of BZ2Decomp. */ | 
|  | 1901 |  | 
|  | 1902 | PyDoc_STRVAR(BZ2Decomp_decompress__doc__, | 
|  | 1903 | "decompress(data) -> string\n\ | 
|  | 1904 | \n\ | 
|  | 1905 | Provide more data to the decompressor object. It will return chunks\n\ | 
|  | 1906 | of decompressed data whenever possible. If you try to decompress data\n\ | 
|  | 1907 | after the end of stream is found, EOFError will be raised. If any data\n\ | 
|  | 1908 | was found after the end of stream, it'll be ignored and saved in\n\ | 
|  | 1909 | unused_data attribute.\n\ | 
|  | 1910 | "); | 
|  | 1911 |  | 
|  | 1912 | static PyObject * | 
|  | 1913 | BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args) | 
|  | 1914 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1915 | Py_buffer pdata; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1916 | size_t input_left; | 
|  | 1917 | size_t output_size = 0; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1918 | PyObject *ret = NULL; | 
|  | 1919 | bz_stream *bzs = &self->bzs; | 
|  | 1920 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1921 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1922 | if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) | 
|  | 1923 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1924 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1925 | ACQUIRE_LOCK(self); | 
|  | 1926 | if (!self->running) { | 
|  | 1927 | PyErr_SetString(PyExc_EOFError, "end of stream was " | 
|  | 1928 | "already found"); | 
|  | 1929 | goto error; | 
|  | 1930 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1931 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1932 | ret = PyString_FromStringAndSize(NULL, SMALLCHUNK); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1933 | if (!ret) | 
|  | 1934 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1935 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1936 | bzs->next_in = pdata.buf; | 
|  | 1937 | bzs->avail_in = MIN(pdata.len, UINT_MAX); | 
|  | 1938 | input_left = pdata.len - bzs->avail_in; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1939 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1940 | bzs->next_out = BUF(ret); | 
|  | 1941 | bzs->avail_out = PyString_GET_SIZE(ret); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1942 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1943 | for (;;) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1944 | char *saved_next_out; | 
|  | 1945 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1946 | Py_BEGIN_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1947 | saved_next_out = bzs->next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1948 | bzerror = BZ2_bzDecompress(bzs); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1949 | output_size += bzs->next_out - saved_next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1950 | Py_END_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1951 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1952 | if (bzerror == BZ_STREAM_END) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1953 | self->running = 0; | 
|  | 1954 | input_left += bzs->avail_in; | 
|  | 1955 | if (input_left != 0) { | 
| Serhiy Storchaka | 763a61c | 2016-04-10 18:05:12 +0300 | [diff] [blame^] | 1956 | Py_SETREF(self->unused_data, | 
| Serhiy Storchaka | 5951f23 | 2015-12-24 10:35:35 +0200 | [diff] [blame] | 1957 | PyString_FromStringAndSize(bzs->next_in, input_left)); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1958 | if (self->unused_data == NULL) | 
|  | 1959 | goto error; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1960 | } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1961 | break; | 
|  | 1962 | } | 
|  | 1963 | if (bzerror != BZ_OK) { | 
|  | 1964 | Util_CatchBZ2Error(bzerror); | 
|  | 1965 | goto error; | 
|  | 1966 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1967 | if (bzs->avail_in == 0) { | 
|  | 1968 | if (input_left == 0) | 
|  | 1969 | break; /* no more input data */ | 
|  | 1970 | bzs->avail_in = MIN(input_left, UINT_MAX); | 
|  | 1971 | input_left -= bzs->avail_in; | 
|  | 1972 | } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1973 | if (bzs->avail_out == 0) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1974 | size_t buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 1975 | if (buffer_left == 0) { | 
|  | 1976 | if (Util_GrowBuffer(&ret) < 0) { | 
|  | 1977 | BZ2_bzDecompressEnd(bzs); | 
|  | 1978 | goto error; | 
|  | 1979 | } | 
|  | 1980 | bzs->next_out = BUF(ret) + output_size; | 
|  | 1981 | buffer_left = PyString_GET_SIZE(ret) - output_size; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1982 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1983 | bzs->avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1984 | } | 
|  | 1985 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1986 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 1987 | if (output_size != PyString_GET_SIZE(ret)) | 
|  | 1988 | if (_PyString_Resize(&ret, output_size) < 0) | 
|  | 1989 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1990 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1991 | RELEASE_LOCK(self); | 
|  | 1992 | PyBuffer_Release(&pdata); | 
|  | 1993 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 1994 |  | 
|  | 1995 | error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1996 | RELEASE_LOCK(self); | 
|  | 1997 | PyBuffer_Release(&pdata); | 
|  | 1998 | Py_XDECREF(ret); | 
|  | 1999 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2000 | } | 
|  | 2001 |  | 
|  | 2002 | static PyMethodDef BZ2Decomp_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2003 | {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__}, | 
|  | 2004 | {NULL,              NULL}           /* sentinel */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2005 | }; | 
|  | 2006 |  | 
|  | 2007 |  | 
|  | 2008 | /* ===================================================================== */ | 
|  | 2009 | /* Slot definitions for BZ2Decomp_Type. */ | 
|  | 2010 |  | 
|  | 2011 | static int | 
|  | 2012 | BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs) | 
|  | 2013 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2014 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2015 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2016 | if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) | 
|  | 2017 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2018 |  | 
|  | 2019 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2020 | self->lock = PyThread_allocate_lock(); | 
|  | 2021 | if (!self->lock) { | 
|  | 2022 | PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); | 
|  | 2023 | goto error; | 
|  | 2024 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2025 | #endif | 
|  | 2026 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2027 | self->unused_data = PyString_FromString(""); | 
|  | 2028 | if (!self->unused_data) | 
|  | 2029 | goto error; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2030 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2031 | memset(&self->bzs, 0, sizeof(bz_stream)); | 
|  | 2032 | bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); | 
|  | 2033 | if (bzerror != BZ_OK) { | 
|  | 2034 | Util_CatchBZ2Error(bzerror); | 
|  | 2035 | goto error; | 
|  | 2036 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2037 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2038 | self->running = 1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2039 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2040 | return 0; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2041 |  | 
|  | 2042 | error: | 
|  | 2043 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2044 | if (self->lock) { | 
|  | 2045 | PyThread_free_lock(self->lock); | 
|  | 2046 | self->lock = NULL; | 
|  | 2047 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2048 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2049 | Py_CLEAR(self->unused_data); | 
|  | 2050 | return -1; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2051 | } | 
|  | 2052 |  | 
|  | 2053 | static void | 
|  | 2054 | BZ2Decomp_dealloc(BZ2DecompObject *self) | 
|  | 2055 | { | 
|  | 2056 | #ifdef WITH_THREAD | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2057 | if (self->lock) | 
|  | 2058 | PyThread_free_lock(self->lock); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2059 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2060 | Py_XDECREF(self->unused_data); | 
|  | 2061 | BZ2_bzDecompressEnd(&self->bzs); | 
|  | 2062 | Py_TYPE(self)->tp_free((PyObject *)self); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2063 | } | 
|  | 2064 |  | 
|  | 2065 |  | 
|  | 2066 | /* ===================================================================== */ | 
|  | 2067 | /* BZ2Decomp_Type definition. */ | 
|  | 2068 |  | 
|  | 2069 | PyDoc_STRVAR(BZ2Decomp__doc__, | 
|  | 2070 | "BZ2Decompressor() -> decompressor object\n\ | 
|  | 2071 | \n\ | 
|  | 2072 | Create a new decompressor object. This object may be used to decompress\n\ | 
|  | 2073 | data sequentially. If you want to decompress data in one shot, use the\n\ | 
|  | 2074 | decompress() function instead.\n\ | 
|  | 2075 | "); | 
|  | 2076 |  | 
| Gustavo Niemeyer | 49ea7be | 2002-11-08 14:31:49 +0000 | [diff] [blame] | 2077 | static PyTypeObject BZ2Decomp_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2078 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 2079 | "bz2.BZ2Decompressor",      /*tp_name*/ | 
|  | 2080 | sizeof(BZ2DecompObject), /*tp_basicsize*/ | 
|  | 2081 | 0,                          /*tp_itemsize*/ | 
|  | 2082 | (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/ | 
|  | 2083 | 0,                          /*tp_print*/ | 
|  | 2084 | 0,                          /*tp_getattr*/ | 
|  | 2085 | 0,                          /*tp_setattr*/ | 
|  | 2086 | 0,                          /*tp_compare*/ | 
|  | 2087 | 0,                          /*tp_repr*/ | 
|  | 2088 | 0,                          /*tp_as_number*/ | 
|  | 2089 | 0,                          /*tp_as_sequence*/ | 
|  | 2090 | 0,                          /*tp_as_mapping*/ | 
|  | 2091 | 0,                          /*tp_hash*/ | 
|  | 2092 | 0,                      /*tp_call*/ | 
|  | 2093 | 0,                      /*tp_str*/ | 
|  | 2094 | PyObject_GenericGetAttr,/*tp_getattro*/ | 
|  | 2095 | PyObject_GenericSetAttr,/*tp_setattro*/ | 
|  | 2096 | 0,                      /*tp_as_buffer*/ | 
|  | 2097 | Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ | 
|  | 2098 | BZ2Decomp__doc__,       /*tp_doc*/ | 
|  | 2099 | 0,                      /*tp_traverse*/ | 
|  | 2100 | 0,                      /*tp_clear*/ | 
|  | 2101 | 0,                      /*tp_richcompare*/ | 
|  | 2102 | 0,                      /*tp_weaklistoffset*/ | 
|  | 2103 | 0,                      /*tp_iter*/ | 
|  | 2104 | 0,                      /*tp_iternext*/ | 
|  | 2105 | BZ2Decomp_methods,      /*tp_methods*/ | 
|  | 2106 | BZ2Decomp_members,      /*tp_members*/ | 
|  | 2107 | 0,                      /*tp_getset*/ | 
|  | 2108 | 0,                      /*tp_base*/ | 
|  | 2109 | 0,                      /*tp_dict*/ | 
|  | 2110 | 0,                      /*tp_descr_get*/ | 
|  | 2111 | 0,                      /*tp_descr_set*/ | 
|  | 2112 | 0,                      /*tp_dictoffset*/ | 
|  | 2113 | (initproc)BZ2Decomp_init, /*tp_init*/ | 
|  | 2114 | PyType_GenericAlloc,    /*tp_alloc*/ | 
|  | 2115 | PyType_GenericNew,      /*tp_new*/ | 
|  | 2116 | _PyObject_Del,          /*tp_free*/ | 
|  | 2117 | 0,                      /*tp_is_gc*/ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2118 | }; | 
|  | 2119 |  | 
|  | 2120 |  | 
|  | 2121 | /* ===================================================================== */ | 
|  | 2122 | /* Module functions. */ | 
|  | 2123 |  | 
|  | 2124 | PyDoc_STRVAR(bz2_compress__doc__, | 
|  | 2125 | "compress(data [, compresslevel=9]) -> string\n\ | 
|  | 2126 | \n\ | 
|  | 2127 | Compress data in one shot. If you want to compress data sequentially,\n\ | 
|  | 2128 | use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\ | 
|  | 2129 | given, must be a number between 1 and 9.\n\ | 
|  | 2130 | "); | 
|  | 2131 |  | 
|  | 2132 | static PyObject * | 
|  | 2133 | bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs) | 
|  | 2134 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2135 | int compresslevel=9; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2136 | int action; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2137 | Py_buffer pdata; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2138 | size_t input_left; | 
|  | 2139 | size_t output_size = 0; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2140 | PyObject *ret = NULL; | 
|  | 2141 | bz_stream _bzs; | 
|  | 2142 | bz_stream *bzs = &_bzs; | 
|  | 2143 | int bzerror; | 
|  | 2144 | static char *kwlist[] = {"data", "compresslevel", 0}; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2145 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2146 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", | 
|  | 2147 | kwlist, &pdata, | 
|  | 2148 | &compresslevel)) | 
|  | 2149 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2150 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2151 | if (compresslevel < 1 || compresslevel > 9) { | 
|  | 2152 | PyErr_SetString(PyExc_ValueError, | 
|  | 2153 | "compresslevel must be between 1 and 9"); | 
|  | 2154 | PyBuffer_Release(&pdata); | 
|  | 2155 | return NULL; | 
|  | 2156 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2157 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2158 | ret = PyString_FromStringAndSize(NULL, SMALLCHUNK); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2159 | if (!ret) { | 
|  | 2160 | PyBuffer_Release(&pdata); | 
|  | 2161 | return NULL; | 
|  | 2162 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2163 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2164 | memset(bzs, 0, sizeof(bz_stream)); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2165 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2166 | bzs->next_in = pdata.buf; | 
|  | 2167 | bzs->avail_in = MIN(pdata.len, UINT_MAX); | 
|  | 2168 | input_left = pdata.len - bzs->avail_in; | 
|  | 2169 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2170 | bzs->next_out = BUF(ret); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2171 | bzs->avail_out = PyString_GET_SIZE(ret); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2172 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2173 | bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0); | 
|  | 2174 | if (bzerror != BZ_OK) { | 
|  | 2175 | Util_CatchBZ2Error(bzerror); | 
|  | 2176 | PyBuffer_Release(&pdata); | 
|  | 2177 | Py_DECREF(ret); | 
|  | 2178 | return NULL; | 
|  | 2179 | } | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 2180 |  | 
| Nadeem Vawda | 895cc22 | 2013-01-02 22:59:51 +0100 | [diff] [blame] | 2181 | action = input_left > 0 ? BZ_RUN : BZ_FINISH; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2182 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2183 | for (;;) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2184 | char *saved_next_out; | 
|  | 2185 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2186 | Py_BEGIN_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2187 | saved_next_out = bzs->next_out; | 
|  | 2188 | bzerror = BZ2_bzCompress(bzs, action); | 
|  | 2189 | output_size += bzs->next_out - saved_next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2190 | Py_END_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2191 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2192 | if (bzerror == BZ_STREAM_END) { | 
|  | 2193 | break; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2194 | } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2195 | BZ2_bzCompressEnd(bzs); | 
|  | 2196 | Util_CatchBZ2Error(bzerror); | 
|  | 2197 | PyBuffer_Release(&pdata); | 
|  | 2198 | Py_DECREF(ret); | 
|  | 2199 | return NULL; | 
|  | 2200 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2201 | if (action == BZ_RUN && bzs->avail_in == 0) { | 
|  | 2202 | if (input_left == 0) { | 
|  | 2203 | action = BZ_FINISH; | 
|  | 2204 | } else { | 
|  | 2205 | bzs->avail_in = MIN(input_left, UINT_MAX); | 
|  | 2206 | input_left -= bzs->avail_in; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2207 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2208 | } | 
|  | 2209 | if (bzs->avail_out == 0) { | 
|  | 2210 | size_t buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 2211 | if (buffer_left == 0) { | 
|  | 2212 | if (Util_GrowBuffer(&ret) < 0) { | 
|  | 2213 | BZ2_bzCompressEnd(bzs); | 
|  | 2214 | PyBuffer_Release(&pdata); | 
|  | 2215 | return NULL; | 
|  | 2216 | } | 
|  | 2217 | bzs->next_out = BUF(ret) + output_size; | 
|  | 2218 | buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 2219 | } | 
|  | 2220 | bzs->avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2221 | } | 
|  | 2222 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2223 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2224 | if (output_size != PyString_GET_SIZE(ret)) | 
|  | 2225 | _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2226 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2227 | BZ2_bzCompressEnd(bzs); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2228 | PyBuffer_Release(&pdata); | 
|  | 2229 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2230 | } | 
|  | 2231 |  | 
|  | 2232 | PyDoc_STRVAR(bz2_decompress__doc__, | 
|  | 2233 | "decompress(data) -> decompressed data\n\ | 
|  | 2234 | \n\ | 
|  | 2235 | Decompress data in one shot. If you want to decompress data sequentially,\n\ | 
|  | 2236 | use an instance of BZ2Decompressor instead.\n\ | 
|  | 2237 | "); | 
|  | 2238 |  | 
|  | 2239 | static PyObject * | 
|  | 2240 | bz2_decompress(PyObject *self, PyObject *args) | 
|  | 2241 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2242 | Py_buffer pdata; | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2243 | size_t input_left; | 
|  | 2244 | size_t output_size = 0; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2245 | PyObject *ret; | 
|  | 2246 | bz_stream _bzs; | 
|  | 2247 | bz_stream *bzs = &_bzs; | 
|  | 2248 | int bzerror; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2249 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2250 | if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) | 
|  | 2251 | return NULL; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2252 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2253 | if (pdata.len == 0) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2254 | PyBuffer_Release(&pdata); | 
|  | 2255 | return PyString_FromString(""); | 
|  | 2256 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2257 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2258 | ret = PyString_FromStringAndSize(NULL, SMALLCHUNK); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2259 | if (!ret) { | 
|  | 2260 | PyBuffer_Release(&pdata); | 
|  | 2261 | return NULL; | 
|  | 2262 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2263 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2264 | memset(bzs, 0, sizeof(bz_stream)); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2265 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2266 | bzs->next_in = pdata.buf; | 
|  | 2267 | bzs->avail_in = MIN(pdata.len, UINT_MAX); | 
|  | 2268 | input_left = pdata.len - bzs->avail_in; | 
|  | 2269 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2270 | bzs->next_out = BUF(ret); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2271 | bzs->avail_out = PyString_GET_SIZE(ret); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2272 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2273 | bzerror = BZ2_bzDecompressInit(bzs, 0, 0); | 
|  | 2274 | if (bzerror != BZ_OK) { | 
|  | 2275 | Util_CatchBZ2Error(bzerror); | 
|  | 2276 | Py_DECREF(ret); | 
|  | 2277 | PyBuffer_Release(&pdata); | 
|  | 2278 | return NULL; | 
|  | 2279 | } | 
| Tim Peters | e322809 | 2002-11-09 04:21:44 +0000 | [diff] [blame] | 2280 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2281 | for (;;) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2282 | char *saved_next_out; | 
|  | 2283 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2284 | Py_BEGIN_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2285 | saved_next_out = bzs->next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2286 | bzerror = BZ2_bzDecompress(bzs); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2287 | output_size += bzs->next_out - saved_next_out; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2288 | Py_END_ALLOW_THREADS | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2289 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2290 | if (bzerror == BZ_STREAM_END) { | 
|  | 2291 | break; | 
|  | 2292 | } else if (bzerror != BZ_OK) { | 
|  | 2293 | BZ2_bzDecompressEnd(bzs); | 
|  | 2294 | Util_CatchBZ2Error(bzerror); | 
|  | 2295 | PyBuffer_Release(&pdata); | 
|  | 2296 | Py_DECREF(ret); | 
|  | 2297 | return NULL; | 
|  | 2298 | } | 
|  | 2299 | if (bzs->avail_in == 0) { | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2300 | if (input_left == 0) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2301 | BZ2_bzDecompressEnd(bzs); | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2302 | PyErr_SetString(PyExc_ValueError, | 
|  | 2303 | "couldn't find end of stream"); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2304 | PyBuffer_Release(&pdata); | 
|  | 2305 | Py_DECREF(ret); | 
|  | 2306 | return NULL; | 
|  | 2307 | } | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2308 | bzs->avail_in = MIN(input_left, UINT_MAX); | 
|  | 2309 | input_left -= bzs->avail_in; | 
|  | 2310 | } | 
|  | 2311 | if (bzs->avail_out == 0) { | 
|  | 2312 | size_t buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 2313 | if (buffer_left == 0) { | 
|  | 2314 | if (Util_GrowBuffer(&ret) < 0) { | 
|  | 2315 | BZ2_bzDecompressEnd(bzs); | 
|  | 2316 | PyBuffer_Release(&pdata); | 
|  | 2317 | return NULL; | 
|  | 2318 | } | 
|  | 2319 | bzs->next_out = BUF(ret) + output_size; | 
|  | 2320 | buffer_left = PyString_GET_SIZE(ret) - output_size; | 
|  | 2321 | } | 
|  | 2322 | bzs->avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2323 | } | 
|  | 2324 | } | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2325 |  | 
| Nadeem Vawda | 45dba1d | 2012-10-21 23:09:08 +0200 | [diff] [blame] | 2326 | if (output_size != PyString_GET_SIZE(ret)) | 
|  | 2327 | _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */ | 
|  | 2328 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2329 | BZ2_bzDecompressEnd(bzs); | 
|  | 2330 | PyBuffer_Release(&pdata); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2331 | return ret; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2332 | } | 
|  | 2333 |  | 
|  | 2334 | static PyMethodDef bz2_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2335 | {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS, | 
|  | 2336 | bz2_compress__doc__}, | 
|  | 2337 | {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS, | 
|  | 2338 | bz2_decompress__doc__}, | 
|  | 2339 | {NULL,              NULL}           /* sentinel */ | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2340 | }; | 
|  | 2341 |  | 
|  | 2342 | /* ===================================================================== */ | 
|  | 2343 | /* Initialization function. */ | 
|  | 2344 |  | 
|  | 2345 | PyDoc_STRVAR(bz2__doc__, | 
|  | 2346 | "The python bz2 module provides a comprehensive interface for\n\ | 
|  | 2347 | the bz2 compression library. It implements a complete file\n\ | 
|  | 2348 | interface, one shot (de)compression functions, and types for\n\ | 
|  | 2349 | sequential (de)compression.\n\ | 
|  | 2350 | "); | 
|  | 2351 |  | 
| Neal Norwitz | 21d896c | 2003-07-01 20:15:21 +0000 | [diff] [blame] | 2352 | PyMODINIT_FUNC | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2353 | initbz2(void) | 
|  | 2354 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2355 | PyObject *m; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2356 |  | 
| Antoine Pitrou | 2c7d685 | 2010-09-23 19:55:24 +0000 | [diff] [blame] | 2357 | if (PyType_Ready(&BZ2File_Type) < 0) | 
|  | 2358 | return; | 
|  | 2359 | if (PyType_Ready(&BZ2Comp_Type) < 0) | 
|  | 2360 | return; | 
|  | 2361 | if (PyType_Ready(&BZ2Decomp_Type) < 0) | 
|  | 2362 | return; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2363 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2364 | m = Py_InitModule3("bz2", bz2_methods, bz2__doc__); | 
|  | 2365 | if (m == NULL) | 
|  | 2366 | return; | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2367 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2368 | PyModule_AddObject(m, "__author__", PyString_FromString(__author__)); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2369 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2370 | Py_INCREF(&BZ2File_Type); | 
|  | 2371 | PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2372 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2373 | Py_INCREF(&BZ2Comp_Type); | 
|  | 2374 | PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2375 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2376 | Py_INCREF(&BZ2Decomp_Type); | 
|  | 2377 | PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type); | 
| Gustavo Niemeyer | f8ca836 | 2002-11-05 16:50:05 +0000 | [diff] [blame] | 2378 | } |