blob: 14178d766c507676e22f06e29fa86a75a6aeb765 [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001"""
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00002The io module provides the Python interfaces to stream handling. The
3builtin open function is defined in this module.
4
5At the top of the I/O hierarchy is the abstract base class IOBase. It
6defines the basic interface to a stream. Note, however, that there is no
7seperation between reading and writing to streams; implementations are
8allowed to throw an IOError if they do not support a given operation.
9
10Extending IOBase is RawIOBase which deals simply with the reading and
11writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
12an interface to OS files.
13
14BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
15subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
16streams that are readable, writable, and both respectively.
17BufferedRandom provides a buffered interface to random access
18streams. BytesIO is a simple stream of in-memory bytes.
19
20Another IOBase subclass, TextIOBase, deals with the encoding and decoding
21of streams into text. TextIOWrapper, which extends it, is a buffered text
22interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
23is a in-memory stream for text.
24
25Argument names are not part of the specification, and only the arguments
26of open() are intended to be used as keyword arguments.
27
28data:
29
30DEFAULT_BUFFER_SIZE
31
32 An int containing the default buffer size used by the module's buffered
33 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
34 possible.
35"""
36# New I/O library conforming to PEP 3116.
37
38# This is a prototype; hopefully eventually some of this will be
39# reimplemented in C.
40
41# XXX edge cases when switching between reading/writing
42# XXX need to support 1 meaning line-buffered
43# XXX whenever an argument is None, use the default value
44# XXX read/write ops should check readable/writable
45# XXX buffered readinto should work with arbitrary buffer objects
46# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
47# XXX check writable, readable and seekable in appropriate places
Christian Heimes3784c6b2008-03-26 23:13:59 +000048from __future__ import print_function
49from __future__ import unicode_literals
Christian Heimes1a6387e2008-03-26 12:49:49 +000050
51__author__ = ("Guido van Rossum <guido@python.org>, "
52 "Mike Verdone <mike.verdone@gmail.com>, "
53 "Mark Russell <mark.russell@zen.co.uk>")
54
55__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
56 "BytesIO", "StringIO", "BufferedIOBase",
57 "BufferedReader", "BufferedWriter", "BufferedRWPair",
58 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
59
60import os
61import abc
62import sys
63import codecs
64import _fileio
65import warnings
66
67# open() uses st_blksize whenever we can
68DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
69
70# py3k has only new style classes
71__metaclass__ = type
72
73class BlockingIOError(IOError):
74
75 """Exception raised when I/O would block on a non-blocking I/O stream."""
76
77 def __init__(self, errno, strerror, characters_written=0):
78 IOError.__init__(self, errno, strerror)
79 self.characters_written = characters_written
80
81
82def open(file, mode="r", buffering=None, encoding=None, errors=None,
83 newline=None, closefd=True):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +000084 r"""Open file and return a stream. If the file cannot be opened, an IOError is
85 raised.
Christian Heimes1a6387e2008-03-26 12:49:49 +000086
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000087 file is either a string giving the name (and the path if the file
88 isn't in the current working directory) of the file to be opened or an
89 integer file descriptor of the file to be wrapped. (If a file
90 descriptor is given, it is closed when the returned I/O object is
91 closed, unless closefd is set to False.)
Christian Heimes1a6387e2008-03-26 12:49:49 +000092
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000093 mode is an optional string that specifies the mode in which the file
94 is opened. It defaults to 'r' which means open for reading in text
95 mode. Other common values are 'w' for writing (truncating the file if
96 it already exists), and 'a' for appending (which on some Unix systems,
97 means that all writes append to the end of the file regardless of the
98 current seek position). In text mode, if encoding is not specified the
99 encoding used is platform dependent. (For reading and writing raw
100 bytes use binary mode and leave encoding unspecified.) The available
101 modes are:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000102
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000103 ========= ===============================================================
104 Character Meaning
105 --------- ---------------------------------------------------------------
106 'r' open for reading (default)
107 'w' open for writing, truncating the file first
108 'a' open for writing, appending to the end of the file if it exists
109 'b' binary mode
110 't' text mode (default)
111 '+' open a disk file for updating (reading and writing)
112 'U' universal newline mode (for backwards compatibility; unneeded
113 for new code)
114 ========= ===============================================================
Christian Heimes1a6387e2008-03-26 12:49:49 +0000115
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000116 The default mode is 'rt' (open for reading text). For binary random
117 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
118 'r+b' opens the file without truncation.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000119
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000120 Python distinguishes between files opened in binary and text modes,
121 even when the underlying operating system doesn't. Files opened in
122 binary mode (appending 'b' to the mode argument) return contents as
123 bytes objects without any decoding. In text mode (the default, or when
124 't' is appended to the mode argument), the contents of the file are
125 returned as strings, the bytes having been first decoded using a
126 platform-dependent encoding or using the specified encoding if given.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000127
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000128 buffering is an optional integer used to set the buffering policy. By
129 default full buffering is on. Pass 0 to switch buffering off (only
130 allowed in binary mode), 1 to set line buffering, and an integer > 1
131 for full buffering.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000132
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000133 encoding is the name of the encoding used to decode or encode the
134 file. This should only be used in text mode. The default encoding is
135 platform dependent, but any encoding supported by Python can be
136 passed. See the codecs module for the list of supported encodings.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000137
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000138 errors is an optional string that specifies how encoding errors are to
139 be handled---this argument should not be used in binary mode. Pass
140 'strict' to raise a ValueError exception if there is an encoding error
141 (the default of None has the same effect), or pass 'ignore' to ignore
142 errors. (Note that ignoring encoding errors can lead to data loss.)
143 See the documentation for codecs.register for a list of the permitted
144 encoding error strings.
145
146 newline controls how universal newlines works (it only applies to text
147 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
148 follows:
149
150 * On input, if newline is None, universal newlines mode is
151 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
152 these are translated into '\n' before being returned to the
153 caller. If it is '', universal newline mode is enabled, but line
154 endings are returned to the caller untranslated. If it has any of
155 the other legal values, input lines are only terminated by the given
156 string, and the line ending is returned to the caller untranslated.
157
158 * On output, if newline is None, any '\n' characters written are
159 translated to the system default line separator, os.linesep. If
160 newline is '', no translation takes place. If newline is any of the
161 other legal values, any '\n' characters written are translated to
162 the given string.
163
164 If closefd is False, the underlying file descriptor will be kept open
165 when the file is closed. This does not work when a file name is given
166 and must be True in that case.
167
168 open() returns a file object whose type depends on the mode, and
169 through which the standard file operations such as reading and writing
170 are performed. When open() is used to open a file in a text mode ('w',
171 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
172 a file in a binary mode, the returned class varies: in read binary
173 mode, it returns a BufferedReader; in write binary and append binary
174 modes, it returns a BufferedWriter, and in read/write mode, it returns
175 a BufferedRandom.
176
177 It is also possible to use a string or bytearray as a file for both
178 reading and writing. For strings StringIO can be used like a file
179 opened in a text mode, and for bytes a BytesIO can be used like a file
180 opened in a binary mode.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000181 """
Christian Heimes3784c6b2008-03-26 23:13:59 +0000182 if not isinstance(file, (basestring, int)):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000183 raise TypeError("invalid file: %r" % file)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000184 if not isinstance(mode, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000185 raise TypeError("invalid mode: %r" % mode)
186 if buffering is not None and not isinstance(buffering, int):
187 raise TypeError("invalid buffering: %r" % buffering)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000188 if encoding is not None and not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000189 raise TypeError("invalid encoding: %r" % encoding)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000190 if errors is not None and not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000191 raise TypeError("invalid errors: %r" % errors)
192 modes = set(mode)
193 if modes - set("arwb+tU") or len(mode) > len(modes):
194 raise ValueError("invalid mode: %r" % mode)
195 reading = "r" in modes
196 writing = "w" in modes
197 appending = "a" in modes
198 updating = "+" in modes
199 text = "t" in modes
200 binary = "b" in modes
201 if "U" in modes:
202 if writing or appending:
203 raise ValueError("can't use U and writing mode at once")
204 reading = True
205 if text and binary:
206 raise ValueError("can't have text and binary mode at once")
207 if reading + writing + appending > 1:
208 raise ValueError("can't have read/write/append mode at once")
209 if not (reading or writing or appending):
210 raise ValueError("must have exactly one of read/write/append mode")
211 if binary and encoding is not None:
212 raise ValueError("binary mode doesn't take an encoding argument")
213 if binary and errors is not None:
214 raise ValueError("binary mode doesn't take an errors argument")
215 if binary and newline is not None:
216 raise ValueError("binary mode doesn't take a newline argument")
217 raw = FileIO(file,
218 (reading and "r" or "") +
219 (writing and "w" or "") +
220 (appending and "a" or "") +
221 (updating and "+" or ""),
222 closefd)
223 if buffering is None:
224 buffering = -1
225 line_buffering = False
226 if buffering == 1 or buffering < 0 and raw.isatty():
227 buffering = -1
228 line_buffering = True
229 if buffering < 0:
230 buffering = DEFAULT_BUFFER_SIZE
231 try:
232 bs = os.fstat(raw.fileno()).st_blksize
233 except (os.error, AttributeError):
234 pass
235 else:
236 if bs > 1:
237 buffering = bs
238 if buffering < 0:
239 raise ValueError("invalid buffering size")
240 if buffering == 0:
241 if binary:
242 raw._name = file
243 raw._mode = mode
244 return raw
245 raise ValueError("can't have unbuffered text I/O")
246 if updating:
247 buffer = BufferedRandom(raw, buffering)
248 elif writing or appending:
249 buffer = BufferedWriter(raw, buffering)
250 elif reading:
251 buffer = BufferedReader(raw, buffering)
252 else:
253 raise ValueError("unknown mode: %r" % mode)
254 if binary:
255 buffer.name = file
256 buffer.mode = mode
257 return buffer
258 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
259 text.name = file
260 text.mode = mode
261 return text
262
263class _DocDescriptor:
264 """Helper for builtins.open.__doc__
265 """
266 def __get__(self, obj, typ):
267 return (
268 "open(file, mode='r', buffering=None, encoding=None, "
269 "errors=None, newline=None, closefd=True)\n\n" +
270 open.__doc__)
271
272class OpenWrapper:
273 """Wrapper for builtins.open
274
275 Trick so that open won't become a bound method when stored
276 as a class variable (as dumbdbm does).
277
278 See initstdio() in Python/pythonrun.c.
279 """
280 __doc__ = _DocDescriptor()
281
282 def __new__(cls, *args, **kwargs):
283 return open(*args, **kwargs)
284
285
286class UnsupportedOperation(ValueError, IOError):
287 pass
288
289
290class IOBase(object):
291
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000292 """The abstract base class for all I/O classes, acting on streams of
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000293 bytes. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000294
295 This class provides dummy implementations for many methods that
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000296 derived classes can override selectively; the default implementations
297 represent a file that cannot be read, written or seeked.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000298
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000299 Even though IOBase does not declare read, readinto, or write because
300 their signatures will vary, implementations and clients should
301 consider those methods part of the interface. Also, implementations
302 may raise a IOError when operations they do not support are called.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000303
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000304 The basic type used for binary data read from or written to a file is
305 bytes. bytearrays are accepted too, and in some cases (such as
306 readinto) needed. Text I/O classes work with str data.
307
308 Note that calling any method (even inquiries) on a closed stream is
309 undefined. Implementations may raise IOError in this case.
310
311 IOBase (and its subclasses) support the iterator protocol, meaning
312 that an IOBase object can be iterated over yielding the lines in a
313 stream.
314
315 IOBase also supports the :keyword:`with` statement. In this example,
316 fp is closed after the suite of the with statment is complete:
317
318 with open('spam.txt', 'r') as fp:
319 fp.write('Spam and eggs!')
Christian Heimes1a6387e2008-03-26 12:49:49 +0000320 """
321
322 __metaclass__ = abc.ABCMeta
323
324 ### Internal ###
325
326 def _unsupported(self, name):
327 """Internal: raise an exception for unsupported operations."""
328 raise UnsupportedOperation("%s.%s() not supported" %
329 (self.__class__.__name__, name))
330
331 ### Positioning ###
332
333 def seek(self, pos, whence = 0):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000334 """Change stream position.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000335
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000336 Change the stream position to byte offset offset. offset is
337 interpreted relative to the position indicated by whence. Values
338 for whence are:
339
340 * 0 -- start of stream (the default); offset should be zero or positive
341 * 1 -- current stream position; offset may be negative
342 * 2 -- end of stream; offset is usually negative
343
344 Return the new absolute position.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000345 """
346 self._unsupported("seek")
347
348 def tell(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000349 """Return current stream position."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000350 return self.seek(0, 1)
351
352 def truncate(self, pos = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000353 """Truncate file to size bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000354
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000355 Size defaults to the current IO position as reported by tell(). Return
356 the new size.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000357 """
358 self._unsupported("truncate")
359
360 ### Flush and close ###
361
362 def flush(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000363 """Flush write buffers, if applicable.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000364
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000365 This is not implemented for read-only and non-blocking streams.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000366 """
367 # XXX Should this return the number of bytes written???
368
369 __closed = False
370
371 def close(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000372 """Flush and close the IO object.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000373
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000374 This method has no effect if the file is already closed.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000375 """
376 if not self.__closed:
377 try:
378 self.flush()
379 except IOError:
380 pass # If flush() fails, just give up
381 self.__closed = True
382
383 def __del__(self):
384 """Destructor. Calls close()."""
385 # The try/except block is in case this is called at program
386 # exit time, when it's possible that globals have already been
387 # deleted, and then the close() call might fail. Since
388 # there's nothing we can do about such failures and they annoy
389 # the end users, we suppress the traceback.
390 try:
391 self.close()
392 except:
393 pass
394
395 ### Inquiries ###
396
397 def seekable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000398 """Return whether object supports random access.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000399
400 If False, seek(), tell() and truncate() will raise IOError.
401 This method may need to do a test seek().
402 """
403 return False
404
405 def _checkSeekable(self, msg=None):
406 """Internal: raise an IOError if file is not seekable
407 """
408 if not self.seekable():
409 raise IOError("File or stream is not seekable."
410 if msg is None else msg)
411
412
413 def readable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000414 """Return whether object was opened for reading.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000415
416 If False, read() will raise IOError.
417 """
418 return False
419
420 def _checkReadable(self, msg=None):
421 """Internal: raise an IOError if file is not readable
422 """
423 if not self.readable():
424 raise IOError("File or stream is not readable."
425 if msg is None else msg)
426
427 def writable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000428 """Return whether object was opened for writing.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000429
430 If False, write() and truncate() will raise IOError.
431 """
432 return False
433
434 def _checkWritable(self, msg=None):
435 """Internal: raise an IOError if file is not writable
436 """
437 if not self.writable():
438 raise IOError("File or stream is not writable."
439 if msg is None else msg)
440
441 @property
442 def closed(self):
443 """closed: bool. True iff the file has been closed.
444
445 For backwards compatibility, this is a property, not a predicate.
446 """
447 return self.__closed
448
449 def _checkClosed(self, msg=None):
450 """Internal: raise an ValueError if file is closed
451 """
452 if self.closed:
453 raise ValueError("I/O operation on closed file."
454 if msg is None else msg)
455
456 ### Context manager ###
457
458 def __enter__(self):
459 """Context management protocol. Returns self."""
460 self._checkClosed()
461 return self
462
463 def __exit__(self, *args):
464 """Context management protocol. Calls close()"""
465 self.close()
466
467 ### Lower-level APIs ###
468
469 # XXX Should these be present even if unimplemented?
470
471 def fileno(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000472 """Returns underlying file descriptor if one exists.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000473
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000474 An IOError is raised if the IO object does not use a file descriptor.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000475 """
476 self._unsupported("fileno")
477
478 def isatty(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000479 """Return whether this is an 'interactive' stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000481 Return False if it can't be determined.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000482 """
483 self._checkClosed()
484 return False
485
486 ### Readline[s] and writelines ###
487
488 def readline(self, limit = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000489 r"""Read and return a line from the stream.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000490
491 If limit is specified, at most limit bytes will be read.
492
493 The line terminator is always b'\n' for binary files; for text
494 files, the newlines argument to open can be used to select the line
495 terminator(s) recognized.
496 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000497 if hasattr(self, "peek"):
498 def nreadahead():
499 readahead = self.peek(1)
500 if not readahead:
501 return 1
502 n = (readahead.find(b"\n") + 1) or len(readahead)
503 if limit >= 0:
504 n = min(n, limit)
505 return n
506 else:
507 def nreadahead():
508 return 1
509 if limit is None:
510 limit = -1
511 res = bytearray()
512 while limit < 0 or len(res) < limit:
513 b = self.read(nreadahead())
514 if not b:
515 break
516 res += b
517 if res.endswith(b"\n"):
518 break
519 return bytes(res)
520
521 def __iter__(self):
522 self._checkClosed()
523 return self
524
525 def next(self):
526 line = self.readline()
527 if not line:
528 raise StopIteration
529 return line
530
531 def readlines(self, hint=None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000532 """Return a list of lines from the stream.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000533
534 hint can be specified to control the number of lines read: no more
535 lines will be read if the total size (in bytes/characters) of all
536 lines so far exceeds hint.
537 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000538 if hint is None:
539 return list(self)
540 n = 0
541 lines = []
542 for line in self:
543 lines.append(line)
544 n += len(line)
545 if n >= hint:
546 break
547 return lines
548
549 def writelines(self, lines):
550 self._checkClosed()
551 for line in lines:
552 self.write(line)
553
554
555class RawIOBase(IOBase):
556
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000557 """Base class for raw binary I/O."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000558
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000559 # The read() method is implemented by calling readinto(); derived
560 # classes that want to support read() only need to implement
561 # readinto() as a primitive operation. In general, readinto() can be
562 # more efficient than read().
Christian Heimes1a6387e2008-03-26 12:49:49 +0000563
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000564 # (It would be tempting to also provide an implementation of
565 # readinto() in terms of read(), in case the latter is a more suitable
566 # primitive operation, but that would lead to nasty recursion in case
567 # a subclass doesn't implement either.)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000568
569 def read(self, n = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000570 """Read and return up to n bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000571
572 Returns an empty bytes array on EOF, or None if the object is
573 set not to block and has no data to read.
574 """
575 if n is None:
576 n = -1
577 if n < 0:
578 return self.readall()
579 b = bytearray(n.__index__())
580 n = self.readinto(b)
581 del b[n:]
582 return bytes(b)
583
584 def readall(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000585 """Read until EOF, using multiple read() call."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000586 res = bytearray()
587 while True:
588 data = self.read(DEFAULT_BUFFER_SIZE)
589 if not data:
590 break
591 res += data
592 return bytes(res)
593
594 def readinto(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000595 """Read up to len(b) bytes into b.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000596
597 Returns number of bytes read (0 for EOF), or None if the object
598 is set not to block as has no data to read.
599 """
600 self._unsupported("readinto")
601
602 def write(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000603 """Write the given buffer to the IO stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000604
605 Returns the number of bytes written, which may be less than len(b).
606 """
607 self._unsupported("write")
608
609
610class FileIO(_fileio._FileIO, RawIOBase):
611
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000612 """Raw I/O implementation for OS files."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000613
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000614 # This multiply inherits from _FileIO and RawIOBase to make
615 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
616 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
617 # to do since _fileio.c is written in C).
Christian Heimes1a6387e2008-03-26 12:49:49 +0000618
619 def close(self):
620 _fileio._FileIO.close(self)
621 RawIOBase.close(self)
622
623 @property
624 def name(self):
625 return self._name
626
627 @property
628 def mode(self):
629 return self._mode
630
631
632class BufferedIOBase(IOBase):
633
634 """Base class for buffered IO objects.
635
636 The main difference with RawIOBase is that the read() method
637 supports omitting the size argument, and does not have a default
638 implementation that defers to readinto().
639
640 In addition, read(), readinto() and write() may raise
641 BlockingIOError if the underlying raw stream is in non-blocking
642 mode and not ready; unlike their raw counterparts, they will never
643 return None.
644
645 A typical implementation should not inherit from a RawIOBase
646 implementation, but wrap one.
647 """
648
649 def read(self, n = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000650 """Read and return up to n bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000651
652 If the argument is omitted, None, or negative, reads and
653 returns all data until EOF.
654
655 If the argument is positive, and the underlying raw stream is
656 not 'interactive', multiple raw reads may be issued to satisfy
657 the byte count (unless EOF is reached first). But for
658 interactive raw streams (XXX and for pipes?), at most one raw
659 read will be issued, and a short result does not imply that
660 EOF is imminent.
661
662 Returns an empty bytes array on EOF.
663
664 Raises BlockingIOError if the underlying raw stream has no
665 data at the moment.
666 """
667 self._unsupported("read")
668
669 def readinto(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000670 """Read up to len(b) bytes into b.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000671
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000672 Like read(), this may issue multiple reads to the underlying raw
673 stream, unless the latter is 'interactive'.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000674
675 Returns the number of bytes read (0 for EOF).
676
677 Raises BlockingIOError if the underlying raw stream has no
678 data at the moment.
679 """
680 # XXX This ought to work with anything that supports the buffer API
681 data = self.read(len(b))
682 n = len(data)
683 try:
684 b[:n] = data
685 except TypeError as err:
686 import array
687 if not isinstance(b, array.array):
688 raise err
689 b[:n] = array.array('b', data)
690 return n
691
692 def write(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000693 """Write the given buffer to the IO stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000694
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000695 Return the number of bytes written, which is never less than
Christian Heimes1a6387e2008-03-26 12:49:49 +0000696 len(b).
697
698 Raises BlockingIOError if the buffer is full and the
699 underlying raw stream cannot accept more data at the moment.
700 """
701 self._unsupported("write")
702
703
704class _BufferedIOMixin(BufferedIOBase):
705
706 """A mixin implementation of BufferedIOBase with an underlying raw stream.
707
708 This passes most requests on to the underlying raw stream. It
709 does *not* provide implementations of read(), readinto() or
710 write().
711 """
712
713 def __init__(self, raw):
714 self.raw = raw
715
716 ### Positioning ###
717
718 def seek(self, pos, whence=0):
719 return self.raw.seek(pos, whence)
720
721 def tell(self):
722 return self.raw.tell()
723
724 def truncate(self, pos=None):
725 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
726 # and a flush may be necessary to synch both views of the current
727 # file state.
728 self.flush()
729
730 if pos is None:
731 pos = self.tell()
732 return self.raw.truncate(pos)
733
734 ### Flush and close ###
735
736 def flush(self):
737 self.raw.flush()
738
739 def close(self):
740 if not self.closed:
741 try:
742 self.flush()
743 except IOError:
744 pass # If flush() fails, just give up
745 self.raw.close()
746
747 ### Inquiries ###
748
749 def seekable(self):
750 return self.raw.seekable()
751
752 def readable(self):
753 return self.raw.readable()
754
755 def writable(self):
756 return self.raw.writable()
757
758 @property
759 def closed(self):
760 return self.raw.closed
761
762 ### Lower-level APIs ###
763
764 def fileno(self):
765 return self.raw.fileno()
766
767 def isatty(self):
768 return self.raw.isatty()
769
770
771class BytesIO(BufferedIOBase):
772
773 """Buffered I/O implementation using an in-memory bytes buffer."""
774
775 # XXX More docs
776
777 def __init__(self, initial_bytes=None):
778 buf = bytearray()
779 if initial_bytes is not None:
780 buf += initial_bytes
781 self._buffer = buf
782 self._pos = 0
783
784 def getvalue(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000785 """Return the bytes value (contents) of the buffer
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000786 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000787 return bytes(self._buffer)
788
789 def read(self, n=None):
790 if n is None:
791 n = -1
792 if n < 0:
793 n = len(self._buffer)
794 newpos = min(len(self._buffer), self._pos + n)
795 b = self._buffer[self._pos : newpos]
796 self._pos = newpos
797 return bytes(b)
798
799 def read1(self, n):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000800 """this is the same as read.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000801 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802 return self.read(n)
803
804 def write(self, b):
805 if self.closed:
806 raise ValueError("write to closed file")
807 if isinstance(b, unicode):
808 raise TypeError("can't write unicode to binary stream")
809 n = len(b)
810 newpos = self._pos + n
811 if newpos > len(self._buffer):
812 # Inserts null bytes between the current end of the file
813 # and the new write position.
814 padding = b'\x00' * (newpos - len(self._buffer) - n)
815 self._buffer[self._pos:newpos - n] = padding
816 self._buffer[self._pos:newpos] = b
817 self._pos = newpos
818 return n
819
820 def seek(self, pos, whence=0):
821 try:
822 pos = pos.__index__()
823 except AttributeError as err:
824 raise TypeError("an integer is required") # from err
825 if whence == 0:
826 self._pos = max(0, pos)
827 elif whence == 1:
828 self._pos = max(0, self._pos + pos)
829 elif whence == 2:
830 self._pos = max(0, len(self._buffer) + pos)
831 else:
832 raise IOError("invalid whence value")
833 return self._pos
834
835 def tell(self):
836 return self._pos
837
838 def truncate(self, pos=None):
839 if pos is None:
840 pos = self._pos
841 del self._buffer[pos:]
842 return pos
843
844 def readable(self):
845 return True
846
847 def writable(self):
848 return True
849
850 def seekable(self):
851 return True
852
853
854class BufferedReader(_BufferedIOMixin):
855
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000856 """BufferedReader(raw[, buffer_size])
857
858 A buffer for a readable, sequential BaseRawIO object.
859
860 The constructor creates a BufferedReader for the given readable raw
861 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
862 is used.
863 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000864
865 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
866 """Create a new buffered reader using the given readable raw IO object.
867 """
868 raw._checkReadable()
869 _BufferedIOMixin.__init__(self, raw)
870 self._read_buf = b""
871 self.buffer_size = buffer_size
872
873 def read(self, n=None):
874 """Read n bytes.
875
876 Returns exactly n bytes of data unless the underlying raw IO
877 stream reaches EOF or if the call would block in non-blocking
878 mode. If n is negative, read until EOF or until read() would
879 block.
880 """
881 if n is None:
882 n = -1
883 nodata_val = b""
884 while n < 0 or len(self._read_buf) < n:
885 to_read = max(self.buffer_size,
886 n if n is not None else 2*len(self._read_buf))
887 current = self.raw.read(to_read)
888 if current in (b"", None):
889 nodata_val = current
890 break
891 self._read_buf += current
892 if self._read_buf:
893 if n < 0:
894 n = len(self._read_buf)
895 out = self._read_buf[:n]
896 self._read_buf = self._read_buf[n:]
897 else:
898 out = nodata_val
899 return out
900
901 def peek(self, n=0):
902 """Returns buffered bytes without advancing the position.
903
904 The argument indicates a desired minimal number of bytes; we
905 do at most one raw read to satisfy it. We never return more
906 than self.buffer_size.
907 """
908 want = min(n, self.buffer_size)
909 have = len(self._read_buf)
910 if have < want:
911 to_read = self.buffer_size - have
912 current = self.raw.read(to_read)
913 if current:
914 self._read_buf += current
915 return self._read_buf
916
917 def read1(self, n):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000918 """Reads up to n bytes, with at most one read() system call."""
919 # Returns up to n bytes. If at least one byte is buffered, we
920 # only return buffered bytes. Otherwise, we do one raw read.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000921 if n <= 0:
922 return b""
923 self.peek(1)
924 return self.read(min(n, len(self._read_buf)))
925
926 def tell(self):
927 return self.raw.tell() - len(self._read_buf)
928
929 def seek(self, pos, whence=0):
930 if whence == 1:
931 pos -= len(self._read_buf)
932 pos = self.raw.seek(pos, whence)
933 self._read_buf = b""
934 return pos
935
936
937class BufferedWriter(_BufferedIOMixin):
938
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000939 """A buffer for a writeable sequential RawIO object.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000940
941 The constructor creates a BufferedWriter for the given writeable raw
942 stream. If the buffer_size is not given, it defaults to
943 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
944 twice the buffer size.
945 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946
947 def __init__(self, raw,
948 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
949 raw._checkWritable()
950 _BufferedIOMixin.__init__(self, raw)
951 self.buffer_size = buffer_size
952 self.max_buffer_size = (2*buffer_size
953 if max_buffer_size is None
954 else max_buffer_size)
955 self._write_buf = bytearray()
956
957 def write(self, b):
958 if self.closed:
959 raise ValueError("write to closed file")
960 if isinstance(b, unicode):
961 raise TypeError("can't write unicode to binary stream")
962 # XXX we can implement some more tricks to try and avoid partial writes
963 if len(self._write_buf) > self.buffer_size:
964 # We're full, so let's pre-flush the buffer
965 try:
966 self.flush()
967 except BlockingIOError as e:
968 # We can't accept anything else.
969 # XXX Why not just let the exception pass through?
970 raise BlockingIOError(e.errno, e.strerror, 0)
971 before = len(self._write_buf)
972 self._write_buf.extend(b)
973 written = len(self._write_buf) - before
974 if len(self._write_buf) > self.buffer_size:
975 try:
976 self.flush()
977 except BlockingIOError as e:
978 if (len(self._write_buf) > self.max_buffer_size):
979 # We've hit max_buffer_size. We have to accept a partial
980 # write and cut back our buffer.
981 overage = len(self._write_buf) - self.max_buffer_size
982 self._write_buf = self._write_buf[:self.max_buffer_size]
983 raise BlockingIOError(e.errno, e.strerror, overage)
984 return written
985
986 def flush(self):
987 if self.closed:
988 raise ValueError("flush of closed file")
989 written = 0
990 try:
991 while self._write_buf:
992 n = self.raw.write(self._write_buf)
993 del self._write_buf[:n]
994 written += n
995 except BlockingIOError as e:
996 n = e.characters_written
997 del self._write_buf[:n]
998 written += n
999 raise BlockingIOError(e.errno, e.strerror, written)
1000
1001 def tell(self):
1002 return self.raw.tell() + len(self._write_buf)
1003
1004 def seek(self, pos, whence=0):
1005 self.flush()
1006 return self.raw.seek(pos, whence)
1007
1008
1009class BufferedRWPair(BufferedIOBase):
1010
1011 """A buffered reader and writer object together.
1012
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001013 A buffered reader object and buffered writer object put together to
1014 form a sequential IO object that can read and write. This is typically
1015 used with a socket or two-way pipe.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001016
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001017 reader and writer are RawIOBase objects that are readable and
1018 writeable respectively. If the buffer_size is omitted it defaults to
1019 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1020 defaults to twice the buffer size.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001021 """
1022
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001023 # XXX The usefulness of this (compared to having two separate IO
1024 # objects) is questionable.
1025
Christian Heimes1a6387e2008-03-26 12:49:49 +00001026 def __init__(self, reader, writer,
1027 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1028 """Constructor.
1029
1030 The arguments are two RawIO instances.
1031 """
1032 reader._checkReadable()
1033 writer._checkWritable()
1034 self.reader = BufferedReader(reader, buffer_size)
1035 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1036
1037 def read(self, n=None):
1038 if n is None:
1039 n = -1
1040 return self.reader.read(n)
1041
1042 def readinto(self, b):
1043 return self.reader.readinto(b)
1044
1045 def write(self, b):
1046 return self.writer.write(b)
1047
1048 def peek(self, n=0):
1049 return self.reader.peek(n)
1050
1051 def read1(self, n):
1052 return self.reader.read1(n)
1053
1054 def readable(self):
1055 return self.reader.readable()
1056
1057 def writable(self):
1058 return self.writer.writable()
1059
1060 def flush(self):
1061 return self.writer.flush()
1062
1063 def close(self):
1064 self.writer.close()
1065 self.reader.close()
1066
1067 def isatty(self):
1068 return self.reader.isatty() or self.writer.isatty()
1069
1070 @property
1071 def closed(self):
1072 return self.writer.closed()
1073
1074
1075class BufferedRandom(BufferedWriter, BufferedReader):
1076
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001077 """A buffered interface to random access streams.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001078
1079 The constructor creates a reader and writer for a seekable stream,
1080 raw, given in the first argument. If the buffer_size is omitted it
1081 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1082 writer) defaults to twice the buffer size.
1083 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001084
1085 def __init__(self, raw,
1086 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1087 raw._checkSeekable()
1088 BufferedReader.__init__(self, raw, buffer_size)
1089 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1090
1091 def seek(self, pos, whence=0):
1092 self.flush()
1093 # First do the raw seek, then empty the read buffer, so that
1094 # if the raw seek fails, we don't lose buffered data forever.
1095 pos = self.raw.seek(pos, whence)
1096 self._read_buf = b""
1097 return pos
1098
1099 def tell(self):
1100 if (self._write_buf):
1101 return self.raw.tell() + len(self._write_buf)
1102 else:
1103 return self.raw.tell() - len(self._read_buf)
1104
1105 def read(self, n=None):
1106 if n is None:
1107 n = -1
1108 self.flush()
1109 return BufferedReader.read(self, n)
1110
1111 def readinto(self, b):
1112 self.flush()
1113 return BufferedReader.readinto(self, b)
1114
1115 def peek(self, n=0):
1116 self.flush()
1117 return BufferedReader.peek(self, n)
1118
1119 def read1(self, n):
1120 self.flush()
1121 return BufferedReader.read1(self, n)
1122
1123 def write(self, b):
1124 if self._read_buf:
1125 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1126 self._read_buf = b""
1127 return BufferedWriter.write(self, b)
1128
1129
1130class TextIOBase(IOBase):
1131
1132 """Base class for text I/O.
1133
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001134 This class provides a character and line based interface to stream
1135 I/O. There is no readinto method because Python's character strings
1136 are immutable. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001137 """
1138
1139 def read(self, n = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001140 """Read at most n characters from stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001141
1142 Read from underlying buffer until we have n characters or we hit EOF.
1143 If n is negative or omitted, read until EOF.
1144 """
1145 self._unsupported("read")
1146
1147 def write(self, s):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001148 """Write string s to stream."""
Christian Heimes1a6387e2008-03-26 12:49:49 +00001149 self._unsupported("write")
1150
1151 def truncate(self, pos = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001152 """Truncate size to pos."""
Christian Heimes1a6387e2008-03-26 12:49:49 +00001153 self.flush()
1154 if pos is None:
1155 pos = self.tell()
1156 self.seek(pos)
1157 return self.buffer.truncate()
1158
1159 def readline(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001160 """Read until newline or EOF.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001161
1162 Returns an empty string if EOF is hit immediately.
1163 """
1164 self._unsupported("readline")
1165
1166 @property
1167 def encoding(self):
1168 """Subclasses should override."""
1169 return None
1170
1171 @property
1172 def newlines(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001173 """Line endings translated so far.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001174
1175 Only line endings translated during reading are considered.
1176
1177 Subclasses should override.
1178 """
1179 return None
1180
1181
1182class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1183 """Codec used when reading a file in universal newlines mode.
1184 It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1185 It also records the types of newlines encountered.
1186 When used with translate=False, it ensures that the newline sequence is
1187 returned in one piece.
1188 """
1189 def __init__(self, decoder, translate, errors='strict'):
1190 codecs.IncrementalDecoder.__init__(self, errors=errors)
1191 self.buffer = b''
1192 self.translate = translate
1193 self.decoder = decoder
1194 self.seennl = 0
1195
1196 def decode(self, input, final=False):
1197 # decode input (with the eventual \r from a previous pass)
1198 if self.buffer:
1199 input = self.buffer + input
1200
1201 output = self.decoder.decode(input, final=final)
1202
1203 # retain last \r even when not translating data:
1204 # then readline() is sure to get \r\n in one pass
1205 if output.endswith("\r") and not final:
1206 output = output[:-1]
1207 self.buffer = b'\r'
1208 else:
1209 self.buffer = b''
1210
1211 # Record which newlines are read
1212 crlf = output.count('\r\n')
1213 cr = output.count('\r') - crlf
1214 lf = output.count('\n') - crlf
1215 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1216 | (crlf and self._CRLF)
1217
1218 if self.translate:
1219 if crlf:
1220 output = output.replace("\r\n", "\n")
1221 if cr:
1222 output = output.replace("\r", "\n")
1223
1224 return output
1225
1226 def getstate(self):
1227 buf, flag = self.decoder.getstate()
1228 return buf + self.buffer, flag
1229
1230 def setstate(self, state):
1231 buf, flag = state
1232 if buf.endswith(b'\r'):
1233 self.buffer = b'\r'
1234 buf = buf[:-1]
1235 else:
1236 self.buffer = b''
1237 self.decoder.setstate((buf, flag))
1238
1239 def reset(self):
1240 self.seennl = 0
1241 self.buffer = b''
1242 self.decoder.reset()
1243
1244 _LF = 1
1245 _CR = 2
1246 _CRLF = 4
1247
1248 @property
1249 def newlines(self):
1250 return (None,
1251 "\n",
1252 "\r",
1253 ("\r", "\n"),
1254 "\r\n",
1255 ("\n", "\r\n"),
1256 ("\r", "\r\n"),
1257 ("\r", "\n", "\r\n")
1258 )[self.seennl]
1259
1260
1261class TextIOWrapper(TextIOBase):
1262
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001263 r"""Character and line based layer over a BufferedIOBase object, buffer.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001264
1265 encoding gives the name of the encoding that the stream will be
1266 decoded or encoded with. It defaults to locale.getpreferredencoding.
1267
1268 errors determines the strictness of encoding and decoding (see the
1269 codecs.register) and defaults to "strict".
1270
1271 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1272 handling of line endings. If it is None, universal newlines is
1273 enabled. With this enabled, on input, the lines endings '\n', '\r',
1274 or '\r\n' are translated to '\n' before being returned to the
1275 caller. Conversely, on output, '\n' is translated to the system
1276 default line seperator, os.linesep. If newline is any other of its
1277 legal values, that newline becomes the newline when the file is read
1278 and it is returned untranslated. On output, '\n' is converted to the
1279 newline.
1280
1281 If line_buffering is True, a call to flush is implied when a call to
1282 write contains a newline character.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001283 """
1284
1285 _CHUNK_SIZE = 128
1286
1287 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1288 line_buffering=False):
1289 if newline not in (None, "", "\n", "\r", "\r\n"):
1290 raise ValueError("illegal newline value: %r" % (newline,))
1291 if encoding is None:
1292 try:
1293 encoding = os.device_encoding(buffer.fileno())
1294 except (AttributeError, UnsupportedOperation):
1295 pass
1296 if encoding is None:
1297 try:
1298 import locale
1299 except ImportError:
1300 # Importing locale may fail if Python is being built
1301 encoding = "ascii"
1302 else:
1303 encoding = locale.getpreferredencoding()
1304
Christian Heimes3784c6b2008-03-26 23:13:59 +00001305 if not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001306 raise ValueError("invalid encoding: %r" % encoding)
1307
1308 if errors is None:
1309 errors = "strict"
1310 else:
Christian Heimes3784c6b2008-03-26 23:13:59 +00001311 if not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001312 raise ValueError("invalid errors: %r" % errors)
1313
1314 self.buffer = buffer
1315 self._line_buffering = line_buffering
1316 self._encoding = encoding
1317 self._errors = errors
1318 self._readuniversal = not newline
1319 self._readtranslate = newline is None
1320 self._readnl = newline
1321 self._writetranslate = newline != ''
1322 self._writenl = newline or os.linesep
1323 self._encoder = None
1324 self._decoder = None
1325 self._decoded_chars = '' # buffer for text returned from decoder
1326 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1327 self._snapshot = None # info for reconstructing decoder state
1328 self._seekable = self._telling = self.buffer.seekable()
1329
1330 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1331 # where dec_flags is the second (integer) item of the decoder state
1332 # and next_input is the chunk of input bytes that comes next after the
1333 # snapshot point. We use this to reconstruct decoder states in tell().
1334
1335 # Naming convention:
1336 # - "bytes_..." for integer variables that count input bytes
1337 # - "chars_..." for integer variables that count decoded characters
1338
Christian Heimes1a6387e2008-03-26 12:49:49 +00001339 @property
1340 def encoding(self):
1341 return self._encoding
1342
1343 @property
1344 def errors(self):
1345 return self._errors
1346
1347 @property
1348 def line_buffering(self):
1349 return self._line_buffering
1350
1351 def seekable(self):
1352 return self._seekable
1353
1354 def flush(self):
1355 self.buffer.flush()
1356 self._telling = self._seekable
1357
1358 def close(self):
1359 try:
1360 self.flush()
1361 except:
1362 pass # If flush() fails, just give up
1363 self.buffer.close()
1364
1365 @property
1366 def closed(self):
1367 return self.buffer.closed
1368
1369 def fileno(self):
1370 return self.buffer.fileno()
1371
1372 def isatty(self):
1373 return self.buffer.isatty()
1374
1375 def write(self, s):
1376 if self.closed:
1377 raise ValueError("write to closed file")
1378 if not isinstance(s, unicode):
1379 raise TypeError("can't write %s to text stream" %
1380 s.__class__.__name__)
1381 length = len(s)
1382 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1383 if haslf and self._writetranslate and self._writenl != "\n":
1384 s = s.replace("\n", self._writenl)
1385 encoder = self._encoder or self._get_encoder()
1386 # XXX What if we were just reading?
1387 b = encoder.encode(s)
1388 self.buffer.write(b)
1389 if self._line_buffering and (haslf or "\r" in s):
1390 self.flush()
1391 self._snapshot = None
1392 if self._decoder:
1393 self._decoder.reset()
1394 return length
1395
1396 def _get_encoder(self):
1397 make_encoder = codecs.getincrementalencoder(self._encoding)
1398 self._encoder = make_encoder(self._errors)
1399 return self._encoder
1400
1401 def _get_decoder(self):
1402 make_decoder = codecs.getincrementaldecoder(self._encoding)
1403 decoder = make_decoder(self._errors)
1404 if self._readuniversal:
1405 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1406 self._decoder = decoder
1407 return decoder
1408
1409 # The following three methods implement an ADT for _decoded_chars.
1410 # Text returned from the decoder is buffered here until the client
1411 # requests it by calling our read() or readline() method.
1412 def _set_decoded_chars(self, chars):
1413 """Set the _decoded_chars buffer."""
1414 self._decoded_chars = chars
1415 self._decoded_chars_used = 0
1416
1417 def _get_decoded_chars(self, n=None):
1418 """Advance into the _decoded_chars buffer."""
1419 offset = self._decoded_chars_used
1420 if n is None:
1421 chars = self._decoded_chars[offset:]
1422 else:
1423 chars = self._decoded_chars[offset:offset + n]
1424 self._decoded_chars_used += len(chars)
1425 return chars
1426
1427 def _rewind_decoded_chars(self, n):
1428 """Rewind the _decoded_chars buffer."""
1429 if self._decoded_chars_used < n:
1430 raise AssertionError("rewind decoded_chars out of bounds")
1431 self._decoded_chars_used -= n
1432
1433 def _read_chunk(self):
1434 """
1435 Read and decode the next chunk of data from the BufferedReader.
1436
1437 The return value is True unless EOF was reached. The decoded string
1438 is placed in self._decoded_chars (replacing its previous value).
1439 The entire input chunk is sent to the decoder, though some of it
1440 may remain buffered in the decoder, yet to be converted.
1441 """
1442
1443 if self._decoder is None:
1444 raise ValueError("no decoder")
1445
1446 if self._telling:
1447 # To prepare for tell(), we need to snapshot a point in the
1448 # file where the decoder's input buffer is empty.
1449
1450 dec_buffer, dec_flags = self._decoder.getstate()
1451 # Given this, we know there was a valid snapshot point
1452 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1453
1454 # Read a chunk, decode it, and put the result in self._decoded_chars.
1455 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1456 eof = not input_chunk
1457 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1458
1459 if self._telling:
1460 # At the snapshot point, len(dec_buffer) bytes before the read,
1461 # the next input to be decoded is dec_buffer + input_chunk.
1462 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1463
1464 return not eof
1465
1466 def _pack_cookie(self, position, dec_flags=0,
1467 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1468 # The meaning of a tell() cookie is: seek to position, set the
1469 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1470 # into the decoder with need_eof as the EOF flag, then skip
1471 # chars_to_skip characters of the decoded result. For most simple
1472 # decoders, tell() will often just give a byte offset in the file.
1473 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1474 (chars_to_skip<<192) | bool(need_eof)<<256)
1475
1476 def _unpack_cookie(self, bigint):
1477 rest, position = divmod(bigint, 1<<64)
1478 rest, dec_flags = divmod(rest, 1<<64)
1479 rest, bytes_to_feed = divmod(rest, 1<<64)
1480 need_eof, chars_to_skip = divmod(rest, 1<<64)
1481 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1482
1483 def tell(self):
1484 if not self._seekable:
1485 raise IOError("underlying stream is not seekable")
1486 if not self._telling:
1487 raise IOError("telling position disabled by next() call")
1488 self.flush()
1489 position = self.buffer.tell()
1490 decoder = self._decoder
1491 if decoder is None or self._snapshot is None:
1492 if self._decoded_chars:
1493 # This should never happen.
1494 raise AssertionError("pending decoded text")
1495 return position
1496
1497 # Skip backward to the snapshot point (see _read_chunk).
1498 dec_flags, next_input = self._snapshot
1499 position -= len(next_input)
1500
1501 # How many decoded characters have been used up since the snapshot?
1502 chars_to_skip = self._decoded_chars_used
1503 if chars_to_skip == 0:
1504 # We haven't moved from the snapshot point.
1505 return self._pack_cookie(position, dec_flags)
1506
1507 # Starting from the snapshot position, we will walk the decoder
1508 # forward until it gives us enough decoded characters.
1509 saved_state = decoder.getstate()
1510 try:
1511 # Note our initial start point.
1512 decoder.setstate((b'', dec_flags))
1513 start_pos = position
1514 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1515 need_eof = 0
1516
1517 # Feed the decoder one byte at a time. As we go, note the
1518 # nearest "safe start point" before the current location
1519 # (a point where the decoder has nothing buffered, so seek()
1520 # can safely start from there and advance to this location).
Amaury Forgeot d'Arc7684f852008-05-03 12:21:13 +00001521 for next_byte in next_input:
Christian Heimes1a6387e2008-03-26 12:49:49 +00001522 bytes_fed += 1
1523 chars_decoded += len(decoder.decode(next_byte))
1524 dec_buffer, dec_flags = decoder.getstate()
1525 if not dec_buffer and chars_decoded <= chars_to_skip:
1526 # Decoder buffer is empty, so this is a safe start point.
1527 start_pos += bytes_fed
1528 chars_to_skip -= chars_decoded
1529 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1530 if chars_decoded >= chars_to_skip:
1531 break
1532 else:
1533 # We didn't get enough decoded data; signal EOF to get more.
1534 chars_decoded += len(decoder.decode(b'', final=True))
1535 need_eof = 1
1536 if chars_decoded < chars_to_skip:
1537 raise IOError("can't reconstruct logical file position")
1538
1539 # The returned cookie corresponds to the last safe start point.
1540 return self._pack_cookie(
1541 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1542 finally:
1543 decoder.setstate(saved_state)
1544
1545 def seek(self, cookie, whence=0):
1546 if not self._seekable:
1547 raise IOError("underlying stream is not seekable")
1548 if whence == 1: # seek relative to current position
1549 if cookie != 0:
1550 raise IOError("can't do nonzero cur-relative seeks")
1551 # Seeking to the current position should attempt to
1552 # sync the underlying buffer with the current position.
1553 whence = 0
1554 cookie = self.tell()
1555 if whence == 2: # seek relative to end of file
1556 if cookie != 0:
1557 raise IOError("can't do nonzero end-relative seeks")
1558 self.flush()
1559 position = self.buffer.seek(0, 2)
1560 self._set_decoded_chars('')
1561 self._snapshot = None
1562 if self._decoder:
1563 self._decoder.reset()
1564 return position
1565 if whence != 0:
1566 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1567 (whence,))
1568 if cookie < 0:
1569 raise ValueError("negative seek position %r" % (cookie,))
1570 self.flush()
1571
1572 # The strategy of seek() is to go back to the safe start point
1573 # and replay the effect of read(chars_to_skip) from there.
1574 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1575 self._unpack_cookie(cookie)
1576
1577 # Seek back to the safe start point.
1578 self.buffer.seek(start_pos)
1579 self._set_decoded_chars('')
1580 self._snapshot = None
1581
1582 # Restore the decoder to its state from the safe start point.
1583 if self._decoder or dec_flags or chars_to_skip:
1584 self._decoder = self._decoder or self._get_decoder()
1585 self._decoder.setstate((b'', dec_flags))
1586 self._snapshot = (dec_flags, b'')
1587
1588 if chars_to_skip:
1589 # Just like _read_chunk, feed the decoder and save a snapshot.
1590 input_chunk = self.buffer.read(bytes_to_feed)
1591 self._set_decoded_chars(
1592 self._decoder.decode(input_chunk, need_eof))
1593 self._snapshot = (dec_flags, input_chunk)
1594
1595 # Skip chars_to_skip of the decoded characters.
1596 if len(self._decoded_chars) < chars_to_skip:
1597 raise IOError("can't restore logical file position")
1598 self._decoded_chars_used = chars_to_skip
1599
1600 return cookie
1601
1602 def read(self, n=None):
1603 if n is None:
1604 n = -1
1605 decoder = self._decoder or self._get_decoder()
1606 if n < 0:
1607 # Read everything.
1608 result = (self._get_decoded_chars() +
1609 decoder.decode(self.buffer.read(), final=True))
1610 self._set_decoded_chars('')
1611 self._snapshot = None
1612 return result
1613 else:
1614 # Keep reading chunks until we have n characters to return.
1615 eof = False
1616 result = self._get_decoded_chars(n)
1617 while len(result) < n and not eof:
1618 eof = not self._read_chunk()
1619 result += self._get_decoded_chars(n - len(result))
1620 return result
1621
1622 def next(self):
1623 self._telling = False
1624 line = self.readline()
1625 if not line:
1626 self._snapshot = None
1627 self._telling = self._seekable
1628 raise StopIteration
1629 return line
1630
1631 def readline(self, limit=None):
1632 if limit is None:
1633 limit = -1
1634
1635 # Grab all the decoded text (we will rewind any extra bits later).
1636 line = self._get_decoded_chars()
1637
1638 start = 0
1639 decoder = self._decoder or self._get_decoder()
1640
1641 pos = endpos = None
1642 while True:
1643 if self._readtranslate:
1644 # Newlines are already translated, only search for \n
1645 pos = line.find('\n', start)
1646 if pos >= 0:
1647 endpos = pos + 1
1648 break
1649 else:
1650 start = len(line)
1651
1652 elif self._readuniversal:
1653 # Universal newline search. Find any of \r, \r\n, \n
1654 # The decoder ensures that \r\n are not split in two pieces
1655
1656 # In C we'd look for these in parallel of course.
1657 nlpos = line.find("\n", start)
1658 crpos = line.find("\r", start)
1659 if crpos == -1:
1660 if nlpos == -1:
1661 # Nothing found
1662 start = len(line)
1663 else:
1664 # Found \n
1665 endpos = nlpos + 1
1666 break
1667 elif nlpos == -1:
1668 # Found lone \r
1669 endpos = crpos + 1
1670 break
1671 elif nlpos < crpos:
1672 # Found \n
1673 endpos = nlpos + 1
1674 break
1675 elif nlpos == crpos + 1:
1676 # Found \r\n
1677 endpos = crpos + 2
1678 break
1679 else:
1680 # Found \r
1681 endpos = crpos + 1
1682 break
1683 else:
1684 # non-universal
1685 pos = line.find(self._readnl)
1686 if pos >= 0:
1687 endpos = pos + len(self._readnl)
1688 break
1689
1690 if limit >= 0 and len(line) >= limit:
1691 endpos = limit # reached length limit
1692 break
1693
1694 # No line ending seen yet - get more data
1695 more_line = ''
1696 while self._read_chunk():
1697 if self._decoded_chars:
1698 break
1699 if self._decoded_chars:
1700 line += self._get_decoded_chars()
1701 else:
1702 # end of file
1703 self._set_decoded_chars('')
1704 self._snapshot = None
1705 return line
1706
1707 if limit >= 0 and endpos > limit:
1708 endpos = limit # don't exceed limit
1709
1710 # Rewind _decoded_chars to just after the line ending we found.
1711 self._rewind_decoded_chars(len(line) - endpos)
1712 return line[:endpos]
1713
1714 @property
1715 def newlines(self):
1716 return self._decoder.newlines if self._decoder else None
1717
1718class StringIO(TextIOWrapper):
1719
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001720 """An in-memory stream for text. The initial_value argument sets the
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001721 value of object. The other arguments are like those of TextIOWrapper's
1722 constructor.
1723 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001724
1725 def __init__(self, initial_value="", encoding="utf-8",
1726 errors="strict", newline="\n"):
1727 super(StringIO, self).__init__(BytesIO(),
1728 encoding=encoding,
1729 errors=errors,
1730 newline=newline)
1731 if initial_value:
1732 if not isinstance(initial_value, unicode):
1733 initial_value = unicode(initial_value)
1734 self.write(initial_value)
1735 self.seek(0)
1736
1737 def getvalue(self):
1738 self.flush()
1739 return self.buffer.getvalue().decode(self._encoding, self._errors)