Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 1 | # Copyright 2006 Google, Inc. All Rights Reserved. |
| 2 | # Licensed to PSF under a Contributor Agreement. |
| 3 | |
| 4 | """New I/O library. |
| 5 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 6 | This is an early prototype; eventually some of this will be |
| 7 | reimplemented in C and the rest may be turned into a package. |
| 8 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 9 | See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m |
| 10 | """ |
| 11 | |
| 12 | __author__ = "Guido van Rossum <guido@python.org>" |
| 13 | |
| 14 | __all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO"] |
| 15 | |
| 16 | import os |
| 17 | |
| 18 | def open(filename, mode="r", buffering=None, *, encoding=None): |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 19 | """Replacement for the built-in open function. |
| 20 | |
| 21 | Args: |
| 22 | filename: string giving the name of the file to be opened |
| 23 | mode: optional mode string; see below |
| 24 | buffering: optional int >= 0 giving the buffer size; values |
| 25 | can be: 0 = unbuffered, 1 = line buffered, |
| 26 | larger = fully buffered |
| 27 | encoding: optional string giving the text encoding (*must* be given |
| 28 | as a keyword argument) |
| 29 | |
| 30 | Mode strings characters: |
| 31 | 'r': open for reading (default) |
| 32 | 'w': open for writing, truncating the file first |
| 33 | 'a': open for writing, appending to the end if the file exists |
| 34 | 'b': binary mode |
| 35 | 't': text mode (default) |
| 36 | '+': open a disk file for updating (implies reading and writing) |
| 37 | |
| 38 | Constraints: |
| 39 | - encoding must not be given when a binary mode is given |
| 40 | - buffering must not be zero when a text mode is given |
| 41 | |
| 42 | Returns: |
| 43 | Depending on the mode and buffering arguments, either a raw |
| 44 | binary stream, a buffered binary stream, or a buffered text |
| 45 | stream, open for reading and/or writing. |
| 46 | """ |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 47 | assert isinstance(filename, str) |
| 48 | assert isinstance(mode, str) |
| 49 | assert buffering is None or isinstance(buffering, int) |
| 50 | assert encoding is None or isinstance(encoding, str) |
| 51 | modes = set(mode) |
| 52 | if modes - set("arwb+t") or len(mode) > len(modes): |
| 53 | raise ValueError("invalid mode: %r" % mode) |
| 54 | reading = "r" in modes |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 55 | writing = "w" in modes |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 56 | appending = "a" in modes |
| 57 | updating = "+" in modes |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 58 | text = "t" in modes |
| 59 | binary = "b" in modes |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 60 | if text and binary: |
| 61 | raise ValueError("can't have text and binary mode at once") |
| 62 | if reading + writing + appending > 1: |
| 63 | raise ValueError("can't have read/write/append mode at once") |
| 64 | if not (reading or writing or appending): |
| 65 | raise ValueError("must have exactly one of read/write/append mode") |
| 66 | if binary and encoding is not None: |
| 67 | raise ValueError("binary mode doesn't take an encoding") |
| 68 | raw = FileIO(filename, |
| 69 | (reading and "r" or "") + |
| 70 | (writing and "w" or "") + |
| 71 | (appending and "a" or "") + |
| 72 | (updating and "+" or "")) |
| 73 | if buffering is None: |
| 74 | buffering = 8*1024 # International standard buffer size |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 75 | # Should default to line buffering if os.isatty(raw.fileno()) |
| 76 | try: |
| 77 | bs = os.fstat(raw.fileno()).st_blksize |
| 78 | except (os.error, AttributeError): |
| 79 | if bs > 1: |
| 80 | buffering = bs |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 81 | if buffering < 0: |
| 82 | raise ValueError("invalid buffering size") |
| 83 | if buffering == 0: |
| 84 | if binary: |
| 85 | return raw |
| 86 | raise ValueError("can't have unbuffered text I/O") |
| 87 | if updating: |
| 88 | buffer = BufferedRandom(raw, buffering) |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 89 | elif writing or appending: |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 90 | buffer = BufferedWriter(raw, buffering) |
| 91 | else: |
| 92 | assert reading |
| 93 | buffer = BufferedReader(raw, buffering) |
| 94 | if binary: |
| 95 | return buffer |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 96 | # XXX What about newline conventions? |
| 97 | textio = TextIOWrapper(buffer, encoding) |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 98 | return textio |
| 99 | |
| 100 | |
| 101 | class RawIOBase: |
| 102 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 103 | """Base class for raw binary I/O. |
| 104 | |
| 105 | This class provides dummy implementations for all methods that |
| 106 | derived classes can override selectively; the default |
| 107 | implementations represent a file that cannot be read, written or |
| 108 | seeked. |
| 109 | |
| 110 | The read() method is implemented by calling readinto(); derived |
| 111 | classes that want to support readon only need to implement |
| 112 | readinto() as a primitive operation. |
| 113 | """ |
| 114 | |
| 115 | # XXX Add individual method docstrings |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 116 | |
| 117 | def read(self, n): |
| 118 | b = bytes(n.__index__()) |
| 119 | self.readinto(b) |
| 120 | return b |
| 121 | |
| 122 | def readinto(self, b): |
| 123 | raise IOError(".readinto() not supported") |
| 124 | |
| 125 | def write(self, b): |
| 126 | raise IOError(".write() not supported") |
| 127 | |
| 128 | def seek(self, pos, whence=0): |
| 129 | raise IOError(".seek() not supported") |
| 130 | |
| 131 | def tell(self): |
| 132 | raise IOError(".tell() not supported") |
| 133 | |
| 134 | def truncate(self, pos=None): |
| 135 | raise IOError(".truncate() not supported") |
| 136 | |
| 137 | def close(self): |
| 138 | pass |
| 139 | |
| 140 | def seekable(self): |
| 141 | return False |
| 142 | |
| 143 | def readable(self): |
| 144 | return False |
| 145 | |
| 146 | def writable(self): |
| 147 | return False |
| 148 | |
| 149 | def __enter__(self): |
| 150 | return self |
| 151 | |
| 152 | def __exit__(self, *args): |
| 153 | self.close() |
| 154 | |
| 155 | def fileno(self): |
| 156 | raise IOError(".fileno() not supported") |
| 157 | |
| 158 | |
| 159 | class FileIO(RawIOBase): |
| 160 | |
| 161 | """Raw I/O implementation for OS files.""" |
| 162 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 163 | # XXX More docs |
| 164 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 165 | def __init__(self, filename, mode): |
| 166 | self._seekable = None |
| 167 | self._mode = mode |
| 168 | if mode == "r": |
| 169 | flags = os.O_RDONLY |
| 170 | elif mode == "w": |
| 171 | flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC |
| 172 | self._writable = True |
| 173 | elif mode == "r+": |
| 174 | flags = os.O_RDWR |
| 175 | else: |
| 176 | assert 0, "unsupported mode %r (for now)" % mode |
| 177 | if hasattr(os, "O_BINARY"): |
| 178 | flags |= os.O_BINARY |
| 179 | self._fd = os.open(filename, flags) |
| 180 | |
| 181 | def readinto(self, b): |
| 182 | # XXX We really should have os.readinto() |
| 183 | b[:] = os.read(self._fd, len(b)) |
| 184 | return len(b) |
| 185 | |
| 186 | def write(self, b): |
| 187 | return os.write(self._fd, b) |
| 188 | |
| 189 | def seek(self, pos, whence=0): |
| 190 | os.lseek(self._fd, pos, whence) |
| 191 | |
| 192 | def tell(self): |
| 193 | return os.lseek(self._fd, 0, 1) |
| 194 | |
| 195 | def truncate(self, pos=None): |
| 196 | if pos is None: |
| 197 | pos = self.tell() |
| 198 | os.ftruncate(self._fd, pos) |
| 199 | |
| 200 | def close(self): |
| 201 | os.close(self._fd) |
| 202 | |
| 203 | def readable(self): |
| 204 | return "r" in self._mode or "+" in self._mode |
| 205 | |
| 206 | def writable(self): |
| 207 | return "w" in self._mode or "+" in self._mode or "a" in self._mode |
| 208 | |
| 209 | def seekable(self): |
| 210 | if self._seekable is None: |
| 211 | try: |
| 212 | os.lseek(self._fd, 0, 1) |
| 213 | except os.error: |
| 214 | self._seekable = False |
| 215 | else: |
| 216 | self._seekable = True |
| 217 | return self._seekable |
| 218 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 219 | def fileno(self): |
| 220 | return self._fd |
| 221 | |
| 222 | |
| 223 | class SocketIO(RawIOBase): |
| 224 | |
| 225 | """Raw I/O implementation for stream sockets.""" |
| 226 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 227 | # XXX More docs |
| 228 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 229 | def __init__(self, sock, mode): |
| 230 | assert mode in ("r", "w", "rw") |
| 231 | self._sock = sock |
| 232 | self._mode = mode |
| 233 | self._readable = "r" in mode |
| 234 | self._writable = "w" in mode |
| 235 | self._seekable = False |
| 236 | |
| 237 | def readinto(self, b): |
| 238 | return self._sock.recv_into(b) |
| 239 | |
| 240 | def write(self, b): |
| 241 | return self._sock.send(b) |
| 242 | |
| 243 | def close(self): |
| 244 | self._sock.close() |
| 245 | |
| 246 | def readable(self): |
| 247 | return "r" in self._mode |
| 248 | |
| 249 | def writable(self): |
| 250 | return "w" in self._mode |
| 251 | |
Neal Norwitz | 8b41c3d | 2007-02-27 06:26:14 +0000 | [diff] [blame] | 252 | # XXX(nnorwitz)??? def fileno(self): return self._sock.fileno() |
| 253 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 254 | |
| 255 | class BytesIO(RawIOBase): |
| 256 | |
| 257 | """Raw I/O implementation for bytes, like StringIO.""" |
| 258 | |
Guido van Rossum | 17e43e5 | 2007-02-27 15:45:13 +0000 | [diff] [blame] | 259 | # XXX More docs |
| 260 | |
Guido van Rossum | 28524c7 | 2007-02-27 05:47:44 +0000 | [diff] [blame] | 261 | def __init__(self, inital_bytes=None): |
| 262 | self._buffer = b"" |
| 263 | self._pos = 0 |
| 264 | if inital_bytes is not None: |
| 265 | self._buffer += inital_bytes |
| 266 | |
| 267 | def getvalue(self): |
| 268 | return self._buffer |
| 269 | |
| 270 | def read(self, n): |
| 271 | assert n >= 0 |
| 272 | newpos = min(len(self._buffer), self._pos + n) |
| 273 | b = self._buffer[self._pos : newpos] |
| 274 | self._pos = newpos |
| 275 | return b |
| 276 | |
| 277 | def readinto(self, b): |
| 278 | b[:] = self.read(len(b)) |
| 279 | |
| 280 | def write(self, b): |
| 281 | n = len(b) |
| 282 | newpos = self._pos + n |
| 283 | self._buffer[self._pos:newpos] = b |
| 284 | self._pos = newpos |
| 285 | return n |
| 286 | |
| 287 | def seek(self, pos, whence=0): |
| 288 | if whence == 0: |
| 289 | self._pos = max(0, pos) |
| 290 | elif whence == 1: |
| 291 | self._pos = max(0, self._pos + pos) |
| 292 | elif whence == 2: |
| 293 | self._pos = max(0, len(self._buffer) + pos) |
| 294 | else: |
| 295 | raise IOError("invalid whence value") |
| 296 | |
| 297 | def tell(self): |
| 298 | return self._pos |
| 299 | |
| 300 | def truncate(self, pos=None): |
| 301 | if pos is None: |
| 302 | pos = self._pos |
| 303 | else: |
| 304 | self._pos = max(0, pos) |
| 305 | del self._buffer[pos:] |
| 306 | |
| 307 | def readable(self): |
| 308 | return True |
| 309 | |
| 310 | def writable(self): |
| 311 | return True |
| 312 | |
| 313 | def seekable(self): |
| 314 | return True |