Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 1 | """New I/O library. |
| 2 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 3 | This is an early prototype; eventually some of this will be |
| 4 | reimplemented in C and the rest may be turned into a package. |
| 5 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 6 | See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m |
| 7 | """ |
| 8 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 9 | __author__ = ("Guido van Rossum <guido@python.org>, " |
| 10 | "Mike Verdone <mike.verdone@gmail.com>") |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 11 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 12 | __all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO", |
| 13 | "BufferedReader", "BufferedWriter", "BufferedRWPair", "EOF"] |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 14 | |
| 15 | import os |
| 16 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 17 | DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes |
| 18 | EOF = b"" |
| 19 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 20 | def open(filename, mode="r", buffering=None, *, encoding=None): |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 21 | """Replacement for the built-in open function. |
| 22 | |
| 23 | Args: |
| 24 | filename: string giving the name of the file to be opened |
| 25 | mode: optional mode string; see below |
| 26 | buffering: optional int >= 0 giving the buffer size; values |
| 27 | can be: 0 = unbuffered, 1 = line buffered, |
| 28 | larger = fully buffered |
| 29 | encoding: optional string giving the text encoding (*must* be given |
| 30 | as a keyword argument) |
| 31 | |
| 32 | Mode strings characters: |
| 33 | 'r': open for reading (default) |
| 34 | 'w': open for writing, truncating the file first |
| 35 | 'a': open for writing, appending to the end if the file exists |
| 36 | 'b': binary mode |
| 37 | 't': text mode (default) |
| 38 | '+': open a disk file for updating (implies reading and writing) |
| 39 | |
| 40 | Constraints: |
| 41 | - encoding must not be given when a binary mode is given |
| 42 | - buffering must not be zero when a text mode is given |
| 43 | |
| 44 | Returns: |
| 45 | Depending on the mode and buffering arguments, either a raw |
| 46 | binary stream, a buffered binary stream, or a buffered text |
| 47 | stream, open for reading and/or writing. |
| 48 | """ |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 49 | assert isinstance(filename, str) |
| 50 | assert isinstance(mode, str) |
| 51 | assert buffering is None or isinstance(buffering, int) |
| 52 | assert encoding is None or isinstance(encoding, str) |
| 53 | modes = set(mode) |
| 54 | if modes - set("arwb+t") or len(mode) > len(modes): |
| 55 | raise ValueError("invalid mode: %r" % mode) |
| 56 | reading = "r" in modes |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 57 | writing = "w" in modes |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 58 | appending = "a" in modes |
| 59 | updating = "+" in modes |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 60 | text = "t" in modes |
| 61 | binary = "b" in modes |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 62 | if text and binary: |
| 63 | raise ValueError("can't have text and binary mode at once") |
| 64 | if reading + writing + appending > 1: |
| 65 | raise ValueError("can't have read/write/append mode at once") |
| 66 | if not (reading or writing or appending): |
| 67 | raise ValueError("must have exactly one of read/write/append mode") |
| 68 | if binary and encoding is not None: |
| 69 | raise ValueError("binary mode doesn't take an encoding") |
| 70 | raw = FileIO(filename, |
| 71 | (reading and "r" or "") + |
| 72 | (writing and "w" or "") + |
| 73 | (appending and "a" or "") + |
| 74 | (updating and "+" or "")) |
| 75 | if buffering is None: |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 76 | buffering = DEFAULT_BUFFER_SIZE |
| 77 | # XXX Should default to line buffering if os.isatty(raw.fileno()) |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 78 | try: |
| 79 | bs = os.fstat(raw.fileno()).st_blksize |
| 80 | except (os.error, AttributeError): |
| 81 | if bs > 1: |
| 82 | buffering = bs |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 83 | if buffering < 0: |
| 84 | raise ValueError("invalid buffering size") |
| 85 | if buffering == 0: |
| 86 | if binary: |
| 87 | return raw |
| 88 | raise ValueError("can't have unbuffered text I/O") |
| 89 | if updating: |
| 90 | buffer = BufferedRandom(raw, buffering) |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 91 | elif writing or appending: |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 92 | buffer = BufferedWriter(raw, buffering) |
| 93 | else: |
| 94 | assert reading |
| 95 | buffer = BufferedReader(raw, buffering) |
| 96 | if binary: |
| 97 | return buffer |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 98 | # XXX What about newline conventions? |
| 99 | textio = TextIOWrapper(buffer, encoding) |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 100 | return textio |
| 101 | |
| 102 | |
| 103 | class RawIOBase: |
| 104 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 105 | """Base class for raw binary I/O. |
| 106 | |
| 107 | This class provides dummy implementations for all methods that |
| 108 | derived classes can override selectively; the default |
| 109 | implementations represent a file that cannot be read, written or |
| 110 | seeked. |
| 111 | |
| 112 | The read() method is implemented by calling readinto(); derived |
| 113 | classes that want to support readon only need to implement |
| 114 | readinto() as a primitive operation. |
| 115 | """ |
| 116 | |
| 117 | # XXX Add individual method docstrings |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 118 | |
| 119 | def read(self, n): |
| 120 | b = bytes(n.__index__()) |
| 121 | self.readinto(b) |
| 122 | return b |
| 123 | |
| 124 | def readinto(self, b): |
| 125 | raise IOError(".readinto() not supported") |
| 126 | |
| 127 | def write(self, b): |
| 128 | raise IOError(".write() not supported") |
| 129 | |
| 130 | def seek(self, pos, whence=0): |
| 131 | raise IOError(".seek() not supported") |
| 132 | |
| 133 | def tell(self): |
| 134 | raise IOError(".tell() not supported") |
| 135 | |
| 136 | def truncate(self, pos=None): |
| 137 | raise IOError(".truncate() not supported") |
| 138 | |
| 139 | def close(self): |
| 140 | pass |
| 141 | |
| 142 | def seekable(self): |
| 143 | return False |
| 144 | |
| 145 | def readable(self): |
| 146 | return False |
| 147 | |
| 148 | def writable(self): |
| 149 | return False |
| 150 | |
| 151 | def __enter__(self): |
| 152 | return self |
| 153 | |
| 154 | def __exit__(self, *args): |
| 155 | self.close() |
| 156 | |
| 157 | def fileno(self): |
| 158 | raise IOError(".fileno() not supported") |
| 159 | |
| 160 | |
| 161 | class FileIO(RawIOBase): |
| 162 | |
| 163 | """Raw I/O implementation for OS files.""" |
| 164 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 165 | # XXX More docs |
| 166 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 167 | def __init__(self, filename, mode): |
| 168 | self._seekable = None |
| 169 | self._mode = mode |
| 170 | if mode == "r": |
| 171 | flags = os.O_RDONLY |
| 172 | elif mode == "w": |
| 173 | flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC |
| 174 | self._writable = True |
| 175 | elif mode == "r+": |
| 176 | flags = os.O_RDWR |
| 177 | else: |
| 178 | assert 0, "unsupported mode %r (for now)" % mode |
| 179 | if hasattr(os, "O_BINARY"): |
| 180 | flags |= os.O_BINARY |
| 181 | self._fd = os.open(filename, flags) |
| 182 | |
| 183 | def readinto(self, b): |
| 184 | # XXX We really should have os.readinto() |
| 185 | b[:] = os.read(self._fd, len(b)) |
| 186 | return len(b) |
| 187 | |
| 188 | def write(self, b): |
| 189 | return os.write(self._fd, b) |
| 190 | |
| 191 | def seek(self, pos, whence=0): |
| 192 | os.lseek(self._fd, pos, whence) |
| 193 | |
| 194 | def tell(self): |
| 195 | return os.lseek(self._fd, 0, 1) |
| 196 | |
| 197 | def truncate(self, pos=None): |
| 198 | if pos is None: |
| 199 | pos = self.tell() |
| 200 | os.ftruncate(self._fd, pos) |
| 201 | |
| 202 | def close(self): |
| 203 | os.close(self._fd) |
| 204 | |
| 205 | def readable(self): |
| 206 | return "r" in self._mode or "+" in self._mode |
| 207 | |
| 208 | def writable(self): |
| 209 | return "w" in self._mode or "+" in self._mode or "a" in self._mode |
| 210 | |
| 211 | def seekable(self): |
| 212 | if self._seekable is None: |
| 213 | try: |
| 214 | os.lseek(self._fd, 0, 1) |
| 215 | except os.error: |
| 216 | self._seekable = False |
| 217 | else: |
| 218 | self._seekable = True |
| 219 | return self._seekable |
| 220 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 221 | def fileno(self): |
| 222 | return self._fd |
| 223 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 224 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 225 | class SocketIO(RawIOBase): |
| 226 | |
| 227 | """Raw I/O implementation for stream sockets.""" |
| 228 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 229 | # XXX More docs |
| 230 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 231 | def __init__(self, sock, mode): |
| 232 | assert mode in ("r", "w", "rw") |
| 233 | self._sock = sock |
| 234 | self._mode = mode |
| 235 | self._readable = "r" in mode |
| 236 | self._writable = "w" in mode |
| 237 | self._seekable = False |
| 238 | |
| 239 | def readinto(self, b): |
| 240 | return self._sock.recv_into(b) |
| 241 | |
| 242 | def write(self, b): |
| 243 | return self._sock.send(b) |
| 244 | |
| 245 | def close(self): |
| 246 | self._sock.close() |
| 247 | |
| 248 | def readable(self): |
| 249 | return "r" in self._mode |
| 250 | |
| 251 | def writable(self): |
| 252 | return "w" in self._mode |
| 253 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 254 | def fileno(self): |
| 255 | return self._sock.fileno() |
Neal Norwitz | 8b41c3d | 2007-02-27 06:26:14 +0000 | [diff] [blame] | 256 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 257 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 258 | class BufferedIOBase(RawIOBase): |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 259 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 260 | """XXX Docstring.""" |
| 261 | |
| 262 | |
| 263 | class BytesIO(BufferedIOBase): |
| 264 | |
| 265 | """Buffered I/O implementation using a bytes buffer, like StringIO.""" |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 266 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 267 | # XXX More docs |
| 268 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 269 | def __init__(self, inital_bytes=None): |
| 270 | self._buffer = b"" |
| 271 | self._pos = 0 |
| 272 | if inital_bytes is not None: |
| 273 | self._buffer += inital_bytes |
| 274 | |
| 275 | def getvalue(self): |
| 276 | return self._buffer |
| 277 | |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 278 | def read(self, n=None): |
| 279 | if n is None: |
| 280 | n = len(self._buffer) |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 281 | assert n >= 0 |
| 282 | newpos = min(len(self._buffer), self._pos + n) |
| 283 | b = self._buffer[self._pos : newpos] |
| 284 | self._pos = newpos |
| 285 | return b |
| 286 | |
| 287 | def readinto(self, b): |
| 288 | b[:] = self.read(len(b)) |
| 289 | |
| 290 | def write(self, b): |
| 291 | n = len(b) |
| 292 | newpos = self._pos + n |
| 293 | self._buffer[self._pos:newpos] = b |
| 294 | self._pos = newpos |
| 295 | return n |
| 296 | |
| 297 | def seek(self, pos, whence=0): |
| 298 | if whence == 0: |
| 299 | self._pos = max(0, pos) |
| 300 | elif whence == 1: |
| 301 | self._pos = max(0, self._pos + pos) |
| 302 | elif whence == 2: |
| 303 | self._pos = max(0, len(self._buffer) + pos) |
| 304 | else: |
| 305 | raise IOError("invalid whence value") |
| 306 | |
| 307 | def tell(self): |
| 308 | return self._pos |
| 309 | |
| 310 | def truncate(self, pos=None): |
| 311 | if pos is None: |
| 312 | pos = self._pos |
| 313 | else: |
| 314 | self._pos = max(0, pos) |
| 315 | del self._buffer[pos:] |
| 316 | |
| 317 | def readable(self): |
| 318 | return True |
| 319 | |
| 320 | def writable(self): |
| 321 | return True |
| 322 | |
| 323 | def seekable(self): |
| 324 | return True |
Guido van Rossum | 68bbcd2 | 2007-02-27 17:19:33 +0000 | [diff] [blame] | 325 | |
| 326 | |
| 327 | class BufferedReader(BufferedIOBase): |
| 328 | |
| 329 | """Buffered reader. |
| 330 | |
| 331 | Buffer for a readable sequential RawIO object. Does not allow |
| 332 | random access (seek, tell). |
| 333 | """ |
| 334 | |
| 335 | def __init__(self, raw): |
| 336 | """ |
| 337 | Create a new buffered reader using the given readable raw IO object. |
| 338 | """ |
| 339 | assert raw.readable() |
| 340 | self.raw = raw |
| 341 | self._read_buf = b'' |
| 342 | if hasattr(raw, 'fileno'): |
| 343 | self.fileno = raw.fileno |
| 344 | |
| 345 | def read(self, n=None): |
| 346 | """ |
| 347 | Read n bytes. Returns exactly n bytes of data unless the underlying |
| 348 | raw IO stream reaches EOF of if the call would block in non-blocking |
| 349 | mode. If n is None, read until EOF or until read() would block. |
| 350 | """ |
| 351 | nodata_val = EOF |
| 352 | while (len(self._read_buf) < n) if (n is not None) else True: |
| 353 | current = self.raw.read(n) |
| 354 | if current in (EOF, None): |
| 355 | nodata_val = current |
| 356 | break |
| 357 | self._read_buf += current # XXX using += is bad |
| 358 | read = self._read_buf[:n] |
| 359 | if (not self._read_buf): |
| 360 | return nodata_val |
| 361 | self._read_buf = self._read_buf[n if n else 0:] |
| 362 | return read |
| 363 | |
| 364 | def write(self, b): |
| 365 | raise IOError(".write() unsupported") |
| 366 | |
| 367 | def readable(self): |
| 368 | return True |
| 369 | |
| 370 | def flush(self): |
| 371 | # Flush is a no-op |
| 372 | pass |
| 373 | |
| 374 | |
| 375 | class BufferedWriter(BufferedIOBase): |
| 376 | |
| 377 | """Buffered writer. |
| 378 | |
| 379 | XXX More docs. |
| 380 | """ |
| 381 | |
| 382 | def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): |
| 383 | assert raw.writeable() |
| 384 | self.raw = raw |
| 385 | self.buffer_size = buffer_size |
| 386 | self._write_buf_stack = [] |
| 387 | self._write_buf_size = 0 |
| 388 | if hasattr(raw, 'fileno'): |
| 389 | self.fileno = raw.fileno |
| 390 | |
| 391 | def read(self, n=None): |
| 392 | raise IOError(".read() not supported") |
| 393 | |
| 394 | def write(self, b): |
| 395 | assert issubclass(type(b), bytes) |
| 396 | self._write_buf_stack.append(b) |
| 397 | self._write_buf_size += len(b) |
| 398 | if (self._write_buf_size > self.buffer_size): |
| 399 | self.flush() |
| 400 | |
| 401 | def writeable(self): |
| 402 | return True |
| 403 | |
| 404 | def flush(self): |
| 405 | buf = b''.join(self._write_buf_stack) |
| 406 | while len(buf): |
| 407 | buf = buf[self.raw.write(buf):] |
| 408 | self._write_buf_stack = [] |
| 409 | self._write_buf_size = 0 |
| 410 | |
| 411 | # XXX support flushing buffer on close, del |
| 412 | |
| 413 | |
| 414 | class BufferedRWPair(BufferedReader, BufferedWriter): |
| 415 | |
| 416 | """Buffered Read/Write Pair. |
| 417 | |
| 418 | A buffered reader object and buffered writer object put together to |
| 419 | form a sequential IO object that can read and write. |
| 420 | """ |
| 421 | |
| 422 | def __init__(self, bufferedReader, bufferedWriter): |
| 423 | assert bufferedReader.readable() |
| 424 | assert bufferedWriter.writeable() |
| 425 | self.bufferedReader = bufferedReader |
| 426 | self.bufferedWriter = bufferedWriter |
| 427 | self.read = bufferedReader.read |
| 428 | self.write = bufferedWriter.write |
| 429 | self.flush = bufferedWriter.flush |
| 430 | self.readable = bufferedReader.readable |
| 431 | self.writeable = bufferedWriter.writeable |
| 432 | |
| 433 | def seekable(self): |
| 434 | return False |