blob: 213b0fc760793b12d392d2ad321649a2dbcd8318 [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001"""
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00002The io module provides the Python interfaces to stream handling. The
3builtin open function is defined in this module.
4
5At the top of the I/O hierarchy is the abstract base class IOBase. It
6defines the basic interface to a stream. Note, however, that there is no
7seperation between reading and writing to streams; implementations are
8allowed to throw an IOError if they do not support a given operation.
9
10Extending IOBase is RawIOBase which deals simply with the reading and
11writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
12an interface to OS files.
13
14BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
15subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
16streams that are readable, writable, and both respectively.
17BufferedRandom provides a buffered interface to random access
18streams. BytesIO is a simple stream of in-memory bytes.
19
20Another IOBase subclass, TextIOBase, deals with the encoding and decoding
21of streams into text. TextIOWrapper, which extends it, is a buffered text
22interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
23is a in-memory stream for text.
24
25Argument names are not part of the specification, and only the arguments
26of open() are intended to be used as keyword arguments.
27
28data:
29
30DEFAULT_BUFFER_SIZE
31
32 An int containing the default buffer size used by the module's buffered
33 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
34 possible.
35"""
36# New I/O library conforming to PEP 3116.
37
38# This is a prototype; hopefully eventually some of this will be
39# reimplemented in C.
40
41# XXX edge cases when switching between reading/writing
42# XXX need to support 1 meaning line-buffered
43# XXX whenever an argument is None, use the default value
44# XXX read/write ops should check readable/writable
45# XXX buffered readinto should work with arbitrary buffer objects
46# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
47# XXX check writable, readable and seekable in appropriate places
Christian Heimes3784c6b2008-03-26 23:13:59 +000048from __future__ import print_function
49from __future__ import unicode_literals
Christian Heimes1a6387e2008-03-26 12:49:49 +000050
51__author__ = ("Guido van Rossum <guido@python.org>, "
52 "Mike Verdone <mike.verdone@gmail.com>, "
53 "Mark Russell <mark.russell@zen.co.uk>")
54
55__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
56 "BytesIO", "StringIO", "BufferedIOBase",
57 "BufferedReader", "BufferedWriter", "BufferedRWPair",
58 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
59
60import os
61import abc
62import sys
63import codecs
64import _fileio
65import warnings
66
67# open() uses st_blksize whenever we can
68DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
69
70# py3k has only new style classes
71__metaclass__ = type
72
73class BlockingIOError(IOError):
74
75 """Exception raised when I/O would block on a non-blocking I/O stream."""
76
77 def __init__(self, errno, strerror, characters_written=0):
78 IOError.__init__(self, errno, strerror)
79 self.characters_written = characters_written
80
81
82def open(file, mode="r", buffering=None, encoding=None, errors=None,
83 newline=None, closefd=True):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000084 r"""
85 Open file and return a stream. If the file cannot be opened, an
86 IOError is raised.
Christian Heimes1a6387e2008-03-26 12:49:49 +000087
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000088 file is either a string giving the name (and the path if the file
89 isn't in the current working directory) of the file to be opened or an
90 integer file descriptor of the file to be wrapped. (If a file
91 descriptor is given, it is closed when the returned I/O object is
92 closed, unless closefd is set to False.)
Christian Heimes1a6387e2008-03-26 12:49:49 +000093
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000094 mode is an optional string that specifies the mode in which the file
95 is opened. It defaults to 'r' which means open for reading in text
96 mode. Other common values are 'w' for writing (truncating the file if
97 it already exists), and 'a' for appending (which on some Unix systems,
98 means that all writes append to the end of the file regardless of the
99 current seek position). In text mode, if encoding is not specified the
100 encoding used is platform dependent. (For reading and writing raw
101 bytes use binary mode and leave encoding unspecified.) The available
102 modes are:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000103
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000104 ========= ===============================================================
105 Character Meaning
106 --------- ---------------------------------------------------------------
107 'r' open for reading (default)
108 'w' open for writing, truncating the file first
109 'a' open for writing, appending to the end of the file if it exists
110 'b' binary mode
111 't' text mode (default)
112 '+' open a disk file for updating (reading and writing)
113 'U' universal newline mode (for backwards compatibility; unneeded
114 for new code)
115 ========= ===============================================================
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000117 The default mode is 'rt' (open for reading text). For binary random
118 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
119 'r+b' opens the file without truncation.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000120
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000121 Python distinguishes between files opened in binary and text modes,
122 even when the underlying operating system doesn't. Files opened in
123 binary mode (appending 'b' to the mode argument) return contents as
124 bytes objects without any decoding. In text mode (the default, or when
125 't' is appended to the mode argument), the contents of the file are
126 returned as strings, the bytes having been first decoded using a
127 platform-dependent encoding or using the specified encoding if given.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000128
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000129 buffering is an optional integer used to set the buffering policy. By
130 default full buffering is on. Pass 0 to switch buffering off (only
131 allowed in binary mode), 1 to set line buffering, and an integer > 1
132 for full buffering.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000133
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000134 encoding is the name of the encoding used to decode or encode the
135 file. This should only be used in text mode. The default encoding is
136 platform dependent, but any encoding supported by Python can be
137 passed. See the codecs module for the list of supported encodings.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000138
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000139 errors is an optional string that specifies how encoding errors are to
140 be handled---this argument should not be used in binary mode. Pass
141 'strict' to raise a ValueError exception if there is an encoding error
142 (the default of None has the same effect), or pass 'ignore' to ignore
143 errors. (Note that ignoring encoding errors can lead to data loss.)
144 See the documentation for codecs.register for a list of the permitted
145 encoding error strings.
146
147 newline controls how universal newlines works (it only applies to text
148 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
149 follows:
150
151 * On input, if newline is None, universal newlines mode is
152 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
153 these are translated into '\n' before being returned to the
154 caller. If it is '', universal newline mode is enabled, but line
155 endings are returned to the caller untranslated. If it has any of
156 the other legal values, input lines are only terminated by the given
157 string, and the line ending is returned to the caller untranslated.
158
159 * On output, if newline is None, any '\n' characters written are
160 translated to the system default line separator, os.linesep. If
161 newline is '', no translation takes place. If newline is any of the
162 other legal values, any '\n' characters written are translated to
163 the given string.
164
165 If closefd is False, the underlying file descriptor will be kept open
166 when the file is closed. This does not work when a file name is given
167 and must be True in that case.
168
169 open() returns a file object whose type depends on the mode, and
170 through which the standard file operations such as reading and writing
171 are performed. When open() is used to open a file in a text mode ('w',
172 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
173 a file in a binary mode, the returned class varies: in read binary
174 mode, it returns a BufferedReader; in write binary and append binary
175 modes, it returns a BufferedWriter, and in read/write mode, it returns
176 a BufferedRandom.
177
178 It is also possible to use a string or bytearray as a file for both
179 reading and writing. For strings StringIO can be used like a file
180 opened in a text mode, and for bytes a BytesIO can be used like a file
181 opened in a binary mode.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000182 """
Christian Heimes3784c6b2008-03-26 23:13:59 +0000183 if not isinstance(file, (basestring, int)):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000184 raise TypeError("invalid file: %r" % file)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000185 if not isinstance(mode, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000186 raise TypeError("invalid mode: %r" % mode)
187 if buffering is not None and not isinstance(buffering, int):
188 raise TypeError("invalid buffering: %r" % buffering)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000189 if encoding is not None and not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000190 raise TypeError("invalid encoding: %r" % encoding)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000191 if errors is not None and not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000192 raise TypeError("invalid errors: %r" % errors)
193 modes = set(mode)
194 if modes - set("arwb+tU") or len(mode) > len(modes):
195 raise ValueError("invalid mode: %r" % mode)
196 reading = "r" in modes
197 writing = "w" in modes
198 appending = "a" in modes
199 updating = "+" in modes
200 text = "t" in modes
201 binary = "b" in modes
202 if "U" in modes:
203 if writing or appending:
204 raise ValueError("can't use U and writing mode at once")
205 reading = True
206 if text and binary:
207 raise ValueError("can't have text and binary mode at once")
208 if reading + writing + appending > 1:
209 raise ValueError("can't have read/write/append mode at once")
210 if not (reading or writing or appending):
211 raise ValueError("must have exactly one of read/write/append mode")
212 if binary and encoding is not None:
213 raise ValueError("binary mode doesn't take an encoding argument")
214 if binary and errors is not None:
215 raise ValueError("binary mode doesn't take an errors argument")
216 if binary and newline is not None:
217 raise ValueError("binary mode doesn't take a newline argument")
218 raw = FileIO(file,
219 (reading and "r" or "") +
220 (writing and "w" or "") +
221 (appending and "a" or "") +
222 (updating and "+" or ""),
223 closefd)
224 if buffering is None:
225 buffering = -1
226 line_buffering = False
227 if buffering == 1 or buffering < 0 and raw.isatty():
228 buffering = -1
229 line_buffering = True
230 if buffering < 0:
231 buffering = DEFAULT_BUFFER_SIZE
232 try:
233 bs = os.fstat(raw.fileno()).st_blksize
234 except (os.error, AttributeError):
235 pass
236 else:
237 if bs > 1:
238 buffering = bs
239 if buffering < 0:
240 raise ValueError("invalid buffering size")
241 if buffering == 0:
242 if binary:
243 raw._name = file
244 raw._mode = mode
245 return raw
246 raise ValueError("can't have unbuffered text I/O")
247 if updating:
248 buffer = BufferedRandom(raw, buffering)
249 elif writing or appending:
250 buffer = BufferedWriter(raw, buffering)
251 elif reading:
252 buffer = BufferedReader(raw, buffering)
253 else:
254 raise ValueError("unknown mode: %r" % mode)
255 if binary:
256 buffer.name = file
257 buffer.mode = mode
258 return buffer
259 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
260 text.name = file
261 text.mode = mode
262 return text
263
264class _DocDescriptor:
265 """Helper for builtins.open.__doc__
266 """
267 def __get__(self, obj, typ):
268 return (
269 "open(file, mode='r', buffering=None, encoding=None, "
270 "errors=None, newline=None, closefd=True)\n\n" +
271 open.__doc__)
272
273class OpenWrapper:
274 """Wrapper for builtins.open
275
276 Trick so that open won't become a bound method when stored
277 as a class variable (as dumbdbm does).
278
279 See initstdio() in Python/pythonrun.c.
280 """
281 __doc__ = _DocDescriptor()
282
283 def __new__(cls, *args, **kwargs):
284 return open(*args, **kwargs)
285
286
287class UnsupportedOperation(ValueError, IOError):
288 pass
289
290
291class IOBase(object):
292
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000293 """
294 The abstract base class for all I/O classes, acting on streams of
295 bytes. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000296
297 This class provides dummy implementations for many methods that
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000298 derived classes can override selectively; the default implementations
299 represent a file that cannot be read, written or seeked.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000300
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000301 Even though IOBase does not declare read, readinto, or write because
302 their signatures will vary, implementations and clients should
303 consider those methods part of the interface. Also, implementations
304 may raise a IOError when operations they do not support are called.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000305
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000306 The basic type used for binary data read from or written to a file is
307 bytes. bytearrays are accepted too, and in some cases (such as
308 readinto) needed. Text I/O classes work with str data.
309
310 Note that calling any method (even inquiries) on a closed stream is
311 undefined. Implementations may raise IOError in this case.
312
313 IOBase (and its subclasses) support the iterator protocol, meaning
314 that an IOBase object can be iterated over yielding the lines in a
315 stream.
316
317 IOBase also supports the :keyword:`with` statement. In this example,
318 fp is closed after the suite of the with statment is complete:
319
320 with open('spam.txt', 'r') as fp:
321 fp.write('Spam and eggs!')
Christian Heimes1a6387e2008-03-26 12:49:49 +0000322 """
323
324 __metaclass__ = abc.ABCMeta
325
326 ### Internal ###
327
328 def _unsupported(self, name):
329 """Internal: raise an exception for unsupported operations."""
330 raise UnsupportedOperation("%s.%s() not supported" %
331 (self.__class__.__name__, name))
332
333 ### Positioning ###
334
335 def seek(self, pos, whence = 0):
336 """seek(pos: int, whence: int = 0) -> int. Change stream position.
337
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000338 Change the stream position to byte offset offset. offset is
339 interpreted relative to the position indicated by whence. Values
340 for whence are:
341
342 * 0 -- start of stream (the default); offset should be zero or positive
343 * 1 -- current stream position; offset may be negative
344 * 2 -- end of stream; offset is usually negative
345
346 Return the new absolute position.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000347 """
348 self._unsupported("seek")
349
350 def tell(self):
351 """tell() -> int. Return current stream position."""
352 return self.seek(0, 1)
353
354 def truncate(self, pos = None):
355 """truncate(size: int = None) -> int. Truncate file to size bytes.
356
357 Size defaults to the current IO position as reported by tell().
358 Returns the new size.
359 """
360 self._unsupported("truncate")
361
362 ### Flush and close ###
363
364 def flush(self):
365 """flush() -> None. Flushes write buffers, if applicable.
366
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000367 This is not implemented for read-only and non-blocking streams.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000368 """
369 # XXX Should this return the number of bytes written???
370
371 __closed = False
372
373 def close(self):
374 """close() -> None. Flushes and closes the IO object.
375
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000376 This method has no effect if the file is already closed.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000377 """
378 if not self.__closed:
379 try:
380 self.flush()
381 except IOError:
382 pass # If flush() fails, just give up
383 self.__closed = True
384
385 def __del__(self):
386 """Destructor. Calls close()."""
387 # The try/except block is in case this is called at program
388 # exit time, when it's possible that globals have already been
389 # deleted, and then the close() call might fail. Since
390 # there's nothing we can do about such failures and they annoy
391 # the end users, we suppress the traceback.
392 try:
393 self.close()
394 except:
395 pass
396
397 ### Inquiries ###
398
399 def seekable(self):
400 """seekable() -> bool. Return whether object supports random access.
401
402 If False, seek(), tell() and truncate() will raise IOError.
403 This method may need to do a test seek().
404 """
405 return False
406
407 def _checkSeekable(self, msg=None):
408 """Internal: raise an IOError if file is not seekable
409 """
410 if not self.seekable():
411 raise IOError("File or stream is not seekable."
412 if msg is None else msg)
413
414
415 def readable(self):
416 """readable() -> bool. Return whether object was opened for reading.
417
418 If False, read() will raise IOError.
419 """
420 return False
421
422 def _checkReadable(self, msg=None):
423 """Internal: raise an IOError if file is not readable
424 """
425 if not self.readable():
426 raise IOError("File or stream is not readable."
427 if msg is None else msg)
428
429 def writable(self):
430 """writable() -> bool. Return whether object was opened for writing.
431
432 If False, write() and truncate() will raise IOError.
433 """
434 return False
435
436 def _checkWritable(self, msg=None):
437 """Internal: raise an IOError if file is not writable
438 """
439 if not self.writable():
440 raise IOError("File or stream is not writable."
441 if msg is None else msg)
442
443 @property
444 def closed(self):
445 """closed: bool. True iff the file has been closed.
446
447 For backwards compatibility, this is a property, not a predicate.
448 """
449 return self.__closed
450
451 def _checkClosed(self, msg=None):
452 """Internal: raise an ValueError if file is closed
453 """
454 if self.closed:
455 raise ValueError("I/O operation on closed file."
456 if msg is None else msg)
457
458 ### Context manager ###
459
460 def __enter__(self):
461 """Context management protocol. Returns self."""
462 self._checkClosed()
463 return self
464
465 def __exit__(self, *args):
466 """Context management protocol. Calls close()"""
467 self.close()
468
469 ### Lower-level APIs ###
470
471 # XXX Should these be present even if unimplemented?
472
473 def fileno(self):
474 """fileno() -> int. Returns underlying file descriptor if one exists.
475
476 Raises IOError if the IO object does not use a file descriptor.
477 """
478 self._unsupported("fileno")
479
480 def isatty(self):
481 """isatty() -> int. Returns whether this is an 'interactive' stream.
482
483 Returns False if we don't know.
484 """
485 self._checkClosed()
486 return False
487
488 ### Readline[s] and writelines ###
489
490 def readline(self, limit = -1):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000491 r"""readline(limit: int = -1) -> bytes Read and return a line from the
492 stream.
493
494 If limit is specified, at most limit bytes will be read.
495
496 The line terminator is always b'\n' for binary files; for text
497 files, the newlines argument to open can be used to select the line
498 terminator(s) recognized.
499 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500 if hasattr(self, "peek"):
501 def nreadahead():
502 readahead = self.peek(1)
503 if not readahead:
504 return 1
505 n = (readahead.find(b"\n") + 1) or len(readahead)
506 if limit >= 0:
507 n = min(n, limit)
508 return n
509 else:
510 def nreadahead():
511 return 1
512 if limit is None:
513 limit = -1
514 res = bytearray()
515 while limit < 0 or len(res) < limit:
516 b = self.read(nreadahead())
517 if not b:
518 break
519 res += b
520 if res.endswith(b"\n"):
521 break
522 return bytes(res)
523
524 def __iter__(self):
525 self._checkClosed()
526 return self
527
528 def next(self):
529 line = self.readline()
530 if not line:
531 raise StopIteration
532 return line
533
534 def readlines(self, hint=None):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000535 """readlines(hint=None) -> list Return a list of lines from the stream.
536
537 hint can be specified to control the number of lines read: no more
538 lines will be read if the total size (in bytes/characters) of all
539 lines so far exceeds hint.
540 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000541 if hint is None:
542 return list(self)
543 n = 0
544 lines = []
545 for line in self:
546 lines.append(line)
547 n += len(line)
548 if n >= hint:
549 break
550 return lines
551
552 def writelines(self, lines):
553 self._checkClosed()
554 for line in lines:
555 self.write(line)
556
557
558class RawIOBase(IOBase):
559
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000560 """Base class for raw binary I/O."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000561
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000562 # The read() method is implemented by calling readinto(); derived
563 # classes that want to support read() only need to implement
564 # readinto() as a primitive operation. In general, readinto() can be
565 # more efficient than read().
Christian Heimes1a6387e2008-03-26 12:49:49 +0000566
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000567 # (It would be tempting to also provide an implementation of
568 # readinto() in terms of read(), in case the latter is a more suitable
569 # primitive operation, but that would lead to nasty recursion in case
570 # a subclass doesn't implement either.)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000571
572 def read(self, n = -1):
573 """read(n: int) -> bytes. Read and return up to n bytes.
574
575 Returns an empty bytes array on EOF, or None if the object is
576 set not to block and has no data to read.
577 """
578 if n is None:
579 n = -1
580 if n < 0:
581 return self.readall()
582 b = bytearray(n.__index__())
583 n = self.readinto(b)
584 del b[n:]
585 return bytes(b)
586
587 def readall(self):
588 """readall() -> bytes. Read until EOF, using multiple read() call."""
589 res = bytearray()
590 while True:
591 data = self.read(DEFAULT_BUFFER_SIZE)
592 if not data:
593 break
594 res += data
595 return bytes(res)
596
597 def readinto(self, b):
598 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
599
600 Returns number of bytes read (0 for EOF), or None if the object
601 is set not to block as has no data to read.
602 """
603 self._unsupported("readinto")
604
605 def write(self, b):
606 """write(b: bytes) -> int. Write the given buffer to the IO stream.
607
608 Returns the number of bytes written, which may be less than len(b).
609 """
610 self._unsupported("write")
611
612
613class FileIO(_fileio._FileIO, RawIOBase):
614
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000615 """Raw I/O implementation for OS files."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000616
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000617 # This multiply inherits from _FileIO and RawIOBase to make
618 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
619 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
620 # to do since _fileio.c is written in C).
Christian Heimes1a6387e2008-03-26 12:49:49 +0000621
622 def close(self):
623 _fileio._FileIO.close(self)
624 RawIOBase.close(self)
625
626 @property
627 def name(self):
628 return self._name
629
630 @property
631 def mode(self):
632 return self._mode
633
634
635class BufferedIOBase(IOBase):
636
637 """Base class for buffered IO objects.
638
639 The main difference with RawIOBase is that the read() method
640 supports omitting the size argument, and does not have a default
641 implementation that defers to readinto().
642
643 In addition, read(), readinto() and write() may raise
644 BlockingIOError if the underlying raw stream is in non-blocking
645 mode and not ready; unlike their raw counterparts, they will never
646 return None.
647
648 A typical implementation should not inherit from a RawIOBase
649 implementation, but wrap one.
650 """
651
652 def read(self, n = None):
653 """read(n: int = None) -> bytes. Read and return up to n bytes.
654
655 If the argument is omitted, None, or negative, reads and
656 returns all data until EOF.
657
658 If the argument is positive, and the underlying raw stream is
659 not 'interactive', multiple raw reads may be issued to satisfy
660 the byte count (unless EOF is reached first). But for
661 interactive raw streams (XXX and for pipes?), at most one raw
662 read will be issued, and a short result does not imply that
663 EOF is imminent.
664
665 Returns an empty bytes array on EOF.
666
667 Raises BlockingIOError if the underlying raw stream has no
668 data at the moment.
669 """
670 self._unsupported("read")
671
672 def readinto(self, b):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000673 """readinto(b: bytearray) -> int. Read up to len(b) bytes into b.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000674
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000675 Like read(), this may issue multiple reads to the underlying raw
676 stream, unless the latter is 'interactive'.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000677
678 Returns the number of bytes read (0 for EOF).
679
680 Raises BlockingIOError if the underlying raw stream has no
681 data at the moment.
682 """
683 # XXX This ought to work with anything that supports the buffer API
684 data = self.read(len(b))
685 n = len(data)
686 try:
687 b[:n] = data
688 except TypeError as err:
689 import array
690 if not isinstance(b, array.array):
691 raise err
692 b[:n] = array.array('b', data)
693 return n
694
695 def write(self, b):
696 """write(b: bytes) -> int. Write the given buffer to the IO stream.
697
698 Returns the number of bytes written, which is never less than
699 len(b).
700
701 Raises BlockingIOError if the buffer is full and the
702 underlying raw stream cannot accept more data at the moment.
703 """
704 self._unsupported("write")
705
706
707class _BufferedIOMixin(BufferedIOBase):
708
709 """A mixin implementation of BufferedIOBase with an underlying raw stream.
710
711 This passes most requests on to the underlying raw stream. It
712 does *not* provide implementations of read(), readinto() or
713 write().
714 """
715
716 def __init__(self, raw):
717 self.raw = raw
718
719 ### Positioning ###
720
721 def seek(self, pos, whence=0):
722 return self.raw.seek(pos, whence)
723
724 def tell(self):
725 return self.raw.tell()
726
727 def truncate(self, pos=None):
728 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
729 # and a flush may be necessary to synch both views of the current
730 # file state.
731 self.flush()
732
733 if pos is None:
734 pos = self.tell()
735 return self.raw.truncate(pos)
736
737 ### Flush and close ###
738
739 def flush(self):
740 self.raw.flush()
741
742 def close(self):
743 if not self.closed:
744 try:
745 self.flush()
746 except IOError:
747 pass # If flush() fails, just give up
748 self.raw.close()
749
750 ### Inquiries ###
751
752 def seekable(self):
753 return self.raw.seekable()
754
755 def readable(self):
756 return self.raw.readable()
757
758 def writable(self):
759 return self.raw.writable()
760
761 @property
762 def closed(self):
763 return self.raw.closed
764
765 ### Lower-level APIs ###
766
767 def fileno(self):
768 return self.raw.fileno()
769
770 def isatty(self):
771 return self.raw.isatty()
772
773
774class BytesIO(BufferedIOBase):
775
776 """Buffered I/O implementation using an in-memory bytes buffer."""
777
778 # XXX More docs
779
780 def __init__(self, initial_bytes=None):
781 buf = bytearray()
782 if initial_bytes is not None:
783 buf += initial_bytes
784 self._buffer = buf
785 self._pos = 0
786
787 def getvalue(self):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000788 """getvalue() -> bytes Return the bytes value (contents) of the buffer
789 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000790 return bytes(self._buffer)
791
792 def read(self, n=None):
793 if n is None:
794 n = -1
795 if n < 0:
796 n = len(self._buffer)
797 newpos = min(len(self._buffer), self._pos + n)
798 b = self._buffer[self._pos : newpos]
799 self._pos = newpos
800 return bytes(b)
801
802 def read1(self, n):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000803 """In BytesIO, this is the same as read.
804 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000805 return self.read(n)
806
807 def write(self, b):
808 if self.closed:
809 raise ValueError("write to closed file")
810 if isinstance(b, unicode):
811 raise TypeError("can't write unicode to binary stream")
812 n = len(b)
813 newpos = self._pos + n
814 if newpos > len(self._buffer):
815 # Inserts null bytes between the current end of the file
816 # and the new write position.
817 padding = b'\x00' * (newpos - len(self._buffer) - n)
818 self._buffer[self._pos:newpos - n] = padding
819 self._buffer[self._pos:newpos] = b
820 self._pos = newpos
821 return n
822
823 def seek(self, pos, whence=0):
824 try:
825 pos = pos.__index__()
826 except AttributeError as err:
827 raise TypeError("an integer is required") # from err
828 if whence == 0:
829 self._pos = max(0, pos)
830 elif whence == 1:
831 self._pos = max(0, self._pos + pos)
832 elif whence == 2:
833 self._pos = max(0, len(self._buffer) + pos)
834 else:
835 raise IOError("invalid whence value")
836 return self._pos
837
838 def tell(self):
839 return self._pos
840
841 def truncate(self, pos=None):
842 if pos is None:
843 pos = self._pos
844 del self._buffer[pos:]
845 return pos
846
847 def readable(self):
848 return True
849
850 def writable(self):
851 return True
852
853 def seekable(self):
854 return True
855
856
857class BufferedReader(_BufferedIOMixin):
858
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000859 """BufferedReader(raw[, buffer_size])
860
861 A buffer for a readable, sequential BaseRawIO object.
862
863 The constructor creates a BufferedReader for the given readable raw
864 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
865 is used.
866 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000867
868 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
869 """Create a new buffered reader using the given readable raw IO object.
870 """
871 raw._checkReadable()
872 _BufferedIOMixin.__init__(self, raw)
873 self._read_buf = b""
874 self.buffer_size = buffer_size
875
876 def read(self, n=None):
877 """Read n bytes.
878
879 Returns exactly n bytes of data unless the underlying raw IO
880 stream reaches EOF or if the call would block in non-blocking
881 mode. If n is negative, read until EOF or until read() would
882 block.
883 """
884 if n is None:
885 n = -1
886 nodata_val = b""
887 while n < 0 or len(self._read_buf) < n:
888 to_read = max(self.buffer_size,
889 n if n is not None else 2*len(self._read_buf))
890 current = self.raw.read(to_read)
891 if current in (b"", None):
892 nodata_val = current
893 break
894 self._read_buf += current
895 if self._read_buf:
896 if n < 0:
897 n = len(self._read_buf)
898 out = self._read_buf[:n]
899 self._read_buf = self._read_buf[n:]
900 else:
901 out = nodata_val
902 return out
903
904 def peek(self, n=0):
905 """Returns buffered bytes without advancing the position.
906
907 The argument indicates a desired minimal number of bytes; we
908 do at most one raw read to satisfy it. We never return more
909 than self.buffer_size.
910 """
911 want = min(n, self.buffer_size)
912 have = len(self._read_buf)
913 if have < want:
914 to_read = self.buffer_size - have
915 current = self.raw.read(to_read)
916 if current:
917 self._read_buf += current
918 return self._read_buf
919
920 def read1(self, n):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000921 """Reads up to n bytes, with at most one read() system call."""
922 # Returns up to n bytes. If at least one byte is buffered, we
923 # only return buffered bytes. Otherwise, we do one raw read.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000924 if n <= 0:
925 return b""
926 self.peek(1)
927 return self.read(min(n, len(self._read_buf)))
928
929 def tell(self):
930 return self.raw.tell() - len(self._read_buf)
931
932 def seek(self, pos, whence=0):
933 if whence == 1:
934 pos -= len(self._read_buf)
935 pos = self.raw.seek(pos, whence)
936 self._read_buf = b""
937 return pos
938
939
940class BufferedWriter(_BufferedIOMixin):
941
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000942 """BufferedWriter(raw[, buffer_size[, max_buffer_size]])
943
944 A buffer for a writeable sequential RawIO object.
945
946 The constructor creates a BufferedWriter for the given writeable raw
947 stream. If the buffer_size is not given, it defaults to
948 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
949 twice the buffer size.
950 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000951
952 def __init__(self, raw,
953 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
954 raw._checkWritable()
955 _BufferedIOMixin.__init__(self, raw)
956 self.buffer_size = buffer_size
957 self.max_buffer_size = (2*buffer_size
958 if max_buffer_size is None
959 else max_buffer_size)
960 self._write_buf = bytearray()
961
962 def write(self, b):
963 if self.closed:
964 raise ValueError("write to closed file")
965 if isinstance(b, unicode):
966 raise TypeError("can't write unicode to binary stream")
967 # XXX we can implement some more tricks to try and avoid partial writes
968 if len(self._write_buf) > self.buffer_size:
969 # We're full, so let's pre-flush the buffer
970 try:
971 self.flush()
972 except BlockingIOError as e:
973 # We can't accept anything else.
974 # XXX Why not just let the exception pass through?
975 raise BlockingIOError(e.errno, e.strerror, 0)
976 before = len(self._write_buf)
977 self._write_buf.extend(b)
978 written = len(self._write_buf) - before
979 if len(self._write_buf) > self.buffer_size:
980 try:
981 self.flush()
982 except BlockingIOError as e:
983 if (len(self._write_buf) > self.max_buffer_size):
984 # We've hit max_buffer_size. We have to accept a partial
985 # write and cut back our buffer.
986 overage = len(self._write_buf) - self.max_buffer_size
987 self._write_buf = self._write_buf[:self.max_buffer_size]
988 raise BlockingIOError(e.errno, e.strerror, overage)
989 return written
990
991 def flush(self):
992 if self.closed:
993 raise ValueError("flush of closed file")
994 written = 0
995 try:
996 while self._write_buf:
997 n = self.raw.write(self._write_buf)
998 del self._write_buf[:n]
999 written += n
1000 except BlockingIOError as e:
1001 n = e.characters_written
1002 del self._write_buf[:n]
1003 written += n
1004 raise BlockingIOError(e.errno, e.strerror, written)
1005
1006 def tell(self):
1007 return self.raw.tell() + len(self._write_buf)
1008
1009 def seek(self, pos, whence=0):
1010 self.flush()
1011 return self.raw.seek(pos, whence)
1012
1013
1014class BufferedRWPair(BufferedIOBase):
1015
1016 """A buffered reader and writer object together.
1017
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001018 A buffered reader object and buffered writer object put together to
1019 form a sequential IO object that can read and write. This is typically
1020 used with a socket or two-way pipe.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001021
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001022 reader and writer are RawIOBase objects that are readable and
1023 writeable respectively. If the buffer_size is omitted it defaults to
1024 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1025 defaults to twice the buffer size.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001026 """
1027
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001028 # XXX The usefulness of this (compared to having two separate IO
1029 # objects) is questionable.
1030
Christian Heimes1a6387e2008-03-26 12:49:49 +00001031 def __init__(self, reader, writer,
1032 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1033 """Constructor.
1034
1035 The arguments are two RawIO instances.
1036 """
1037 reader._checkReadable()
1038 writer._checkWritable()
1039 self.reader = BufferedReader(reader, buffer_size)
1040 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1041
1042 def read(self, n=None):
1043 if n is None:
1044 n = -1
1045 return self.reader.read(n)
1046
1047 def readinto(self, b):
1048 return self.reader.readinto(b)
1049
1050 def write(self, b):
1051 return self.writer.write(b)
1052
1053 def peek(self, n=0):
1054 return self.reader.peek(n)
1055
1056 def read1(self, n):
1057 return self.reader.read1(n)
1058
1059 def readable(self):
1060 return self.reader.readable()
1061
1062 def writable(self):
1063 return self.writer.writable()
1064
1065 def flush(self):
1066 return self.writer.flush()
1067
1068 def close(self):
1069 self.writer.close()
1070 self.reader.close()
1071
1072 def isatty(self):
1073 return self.reader.isatty() or self.writer.isatty()
1074
1075 @property
1076 def closed(self):
1077 return self.writer.closed()
1078
1079
1080class BufferedRandom(BufferedWriter, BufferedReader):
1081
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001082 """BufferedRandom(raw[, buffer_size[, max_buffer_size]])
1083
1084 A buffered interface to random access streams.
1085
1086 The constructor creates a reader and writer for a seekable stream,
1087 raw, given in the first argument. If the buffer_size is omitted it
1088 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1089 writer) defaults to twice the buffer size.
1090 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001091
1092 def __init__(self, raw,
1093 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1094 raw._checkSeekable()
1095 BufferedReader.__init__(self, raw, buffer_size)
1096 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1097
1098 def seek(self, pos, whence=0):
1099 self.flush()
1100 # First do the raw seek, then empty the read buffer, so that
1101 # if the raw seek fails, we don't lose buffered data forever.
1102 pos = self.raw.seek(pos, whence)
1103 self._read_buf = b""
1104 return pos
1105
1106 def tell(self):
1107 if (self._write_buf):
1108 return self.raw.tell() + len(self._write_buf)
1109 else:
1110 return self.raw.tell() - len(self._read_buf)
1111
1112 def read(self, n=None):
1113 if n is None:
1114 n = -1
1115 self.flush()
1116 return BufferedReader.read(self, n)
1117
1118 def readinto(self, b):
1119 self.flush()
1120 return BufferedReader.readinto(self, b)
1121
1122 def peek(self, n=0):
1123 self.flush()
1124 return BufferedReader.peek(self, n)
1125
1126 def read1(self, n):
1127 self.flush()
1128 return BufferedReader.read1(self, n)
1129
1130 def write(self, b):
1131 if self._read_buf:
1132 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1133 self._read_buf = b""
1134 return BufferedWriter.write(self, b)
1135
1136
1137class TextIOBase(IOBase):
1138
1139 """Base class for text I/O.
1140
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001141 This class provides a character and line based interface to stream
1142 I/O. There is no readinto method because Python's character strings
1143 are immutable. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001144 """
1145
1146 def read(self, n = -1):
1147 """read(n: int = -1) -> unicode. Read at most n characters from stream.
1148
1149 Read from underlying buffer until we have n characters or we hit EOF.
1150 If n is negative or omitted, read until EOF.
1151 """
1152 self._unsupported("read")
1153
1154 def write(self, s):
1155 """write(s: unicode) -> int. Write string s to stream."""
1156 self._unsupported("write")
1157
1158 def truncate(self, pos = None):
1159 """truncate(pos: int = None) -> int. Truncate size to pos."""
1160 self.flush()
1161 if pos is None:
1162 pos = self.tell()
1163 self.seek(pos)
1164 return self.buffer.truncate()
1165
1166 def readline(self):
1167 """readline() -> unicode. Read until newline or EOF.
1168
1169 Returns an empty string if EOF is hit immediately.
1170 """
1171 self._unsupported("readline")
1172
1173 @property
1174 def encoding(self):
1175 """Subclasses should override."""
1176 return None
1177
1178 @property
1179 def newlines(self):
1180 """newlines -> None | unicode | tuple of unicode. Line endings translated
1181 so far.
1182
1183 Only line endings translated during reading are considered.
1184
1185 Subclasses should override.
1186 """
1187 return None
1188
1189
1190class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1191 """Codec used when reading a file in universal newlines mode.
1192 It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1193 It also records the types of newlines encountered.
1194 When used with translate=False, it ensures that the newline sequence is
1195 returned in one piece.
1196 """
1197 def __init__(self, decoder, translate, errors='strict'):
1198 codecs.IncrementalDecoder.__init__(self, errors=errors)
1199 self.buffer = b''
1200 self.translate = translate
1201 self.decoder = decoder
1202 self.seennl = 0
1203
1204 def decode(self, input, final=False):
1205 # decode input (with the eventual \r from a previous pass)
1206 if self.buffer:
1207 input = self.buffer + input
1208
1209 output = self.decoder.decode(input, final=final)
1210
1211 # retain last \r even when not translating data:
1212 # then readline() is sure to get \r\n in one pass
1213 if output.endswith("\r") and not final:
1214 output = output[:-1]
1215 self.buffer = b'\r'
1216 else:
1217 self.buffer = b''
1218
1219 # Record which newlines are read
1220 crlf = output.count('\r\n')
1221 cr = output.count('\r') - crlf
1222 lf = output.count('\n') - crlf
1223 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1224 | (crlf and self._CRLF)
1225
1226 if self.translate:
1227 if crlf:
1228 output = output.replace("\r\n", "\n")
1229 if cr:
1230 output = output.replace("\r", "\n")
1231
1232 return output
1233
1234 def getstate(self):
1235 buf, flag = self.decoder.getstate()
1236 return buf + self.buffer, flag
1237
1238 def setstate(self, state):
1239 buf, flag = state
1240 if buf.endswith(b'\r'):
1241 self.buffer = b'\r'
1242 buf = buf[:-1]
1243 else:
1244 self.buffer = b''
1245 self.decoder.setstate((buf, flag))
1246
1247 def reset(self):
1248 self.seennl = 0
1249 self.buffer = b''
1250 self.decoder.reset()
1251
1252 _LF = 1
1253 _CR = 2
1254 _CRLF = 4
1255
1256 @property
1257 def newlines(self):
1258 return (None,
1259 "\n",
1260 "\r",
1261 ("\r", "\n"),
1262 "\r\n",
1263 ("\n", "\r\n"),
1264 ("\r", "\r\n"),
1265 ("\r", "\n", "\r\n")
1266 )[self.seennl]
1267
1268
1269class TextIOWrapper(TextIOBase):
1270
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001271 r"""TextIOWrapper(buffer[, encoding[, errors[, newline[, line_buffering]]]])
Christian Heimes1a6387e2008-03-26 12:49:49 +00001272
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001273 Character and line based layer over a BufferedIOBase object, buffer.
1274
1275 encoding gives the name of the encoding that the stream will be
1276 decoded or encoded with. It defaults to locale.getpreferredencoding.
1277
1278 errors determines the strictness of encoding and decoding (see the
1279 codecs.register) and defaults to "strict".
1280
1281 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1282 handling of line endings. If it is None, universal newlines is
1283 enabled. With this enabled, on input, the lines endings '\n', '\r',
1284 or '\r\n' are translated to '\n' before being returned to the
1285 caller. Conversely, on output, '\n' is translated to the system
1286 default line seperator, os.linesep. If newline is any other of its
1287 legal values, that newline becomes the newline when the file is read
1288 and it is returned untranslated. On output, '\n' is converted to the
1289 newline.
1290
1291 If line_buffering is True, a call to flush is implied when a call to
1292 write contains a newline character.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001293 """
1294
1295 _CHUNK_SIZE = 128
1296
1297 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1298 line_buffering=False):
1299 if newline not in (None, "", "\n", "\r", "\r\n"):
1300 raise ValueError("illegal newline value: %r" % (newline,))
1301 if encoding is None:
1302 try:
1303 encoding = os.device_encoding(buffer.fileno())
1304 except (AttributeError, UnsupportedOperation):
1305 pass
1306 if encoding is None:
1307 try:
1308 import locale
1309 except ImportError:
1310 # Importing locale may fail if Python is being built
1311 encoding = "ascii"
1312 else:
1313 encoding = locale.getpreferredencoding()
1314
Christian Heimes3784c6b2008-03-26 23:13:59 +00001315 if not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001316 raise ValueError("invalid encoding: %r" % encoding)
1317
1318 if errors is None:
1319 errors = "strict"
1320 else:
Christian Heimes3784c6b2008-03-26 23:13:59 +00001321 if not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001322 raise ValueError("invalid errors: %r" % errors)
1323
1324 self.buffer = buffer
1325 self._line_buffering = line_buffering
1326 self._encoding = encoding
1327 self._errors = errors
1328 self._readuniversal = not newline
1329 self._readtranslate = newline is None
1330 self._readnl = newline
1331 self._writetranslate = newline != ''
1332 self._writenl = newline or os.linesep
1333 self._encoder = None
1334 self._decoder = None
1335 self._decoded_chars = '' # buffer for text returned from decoder
1336 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1337 self._snapshot = None # info for reconstructing decoder state
1338 self._seekable = self._telling = self.buffer.seekable()
1339
1340 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1341 # where dec_flags is the second (integer) item of the decoder state
1342 # and next_input is the chunk of input bytes that comes next after the
1343 # snapshot point. We use this to reconstruct decoder states in tell().
1344
1345 # Naming convention:
1346 # - "bytes_..." for integer variables that count input bytes
1347 # - "chars_..." for integer variables that count decoded characters
1348
Christian Heimes1a6387e2008-03-26 12:49:49 +00001349 @property
1350 def encoding(self):
1351 return self._encoding
1352
1353 @property
1354 def errors(self):
1355 return self._errors
1356
1357 @property
1358 def line_buffering(self):
1359 return self._line_buffering
1360
1361 def seekable(self):
1362 return self._seekable
1363
1364 def flush(self):
1365 self.buffer.flush()
1366 self._telling = self._seekable
1367
1368 def close(self):
1369 try:
1370 self.flush()
1371 except:
1372 pass # If flush() fails, just give up
1373 self.buffer.close()
1374
1375 @property
1376 def closed(self):
1377 return self.buffer.closed
1378
1379 def fileno(self):
1380 return self.buffer.fileno()
1381
1382 def isatty(self):
1383 return self.buffer.isatty()
1384
1385 def write(self, s):
1386 if self.closed:
1387 raise ValueError("write to closed file")
1388 if not isinstance(s, unicode):
1389 raise TypeError("can't write %s to text stream" %
1390 s.__class__.__name__)
1391 length = len(s)
1392 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1393 if haslf and self._writetranslate and self._writenl != "\n":
1394 s = s.replace("\n", self._writenl)
1395 encoder = self._encoder or self._get_encoder()
1396 # XXX What if we were just reading?
1397 b = encoder.encode(s)
1398 self.buffer.write(b)
1399 if self._line_buffering and (haslf or "\r" in s):
1400 self.flush()
1401 self._snapshot = None
1402 if self._decoder:
1403 self._decoder.reset()
1404 return length
1405
1406 def _get_encoder(self):
1407 make_encoder = codecs.getincrementalencoder(self._encoding)
1408 self._encoder = make_encoder(self._errors)
1409 return self._encoder
1410
1411 def _get_decoder(self):
1412 make_decoder = codecs.getincrementaldecoder(self._encoding)
1413 decoder = make_decoder(self._errors)
1414 if self._readuniversal:
1415 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1416 self._decoder = decoder
1417 return decoder
1418
1419 # The following three methods implement an ADT for _decoded_chars.
1420 # Text returned from the decoder is buffered here until the client
1421 # requests it by calling our read() or readline() method.
1422 def _set_decoded_chars(self, chars):
1423 """Set the _decoded_chars buffer."""
1424 self._decoded_chars = chars
1425 self._decoded_chars_used = 0
1426
1427 def _get_decoded_chars(self, n=None):
1428 """Advance into the _decoded_chars buffer."""
1429 offset = self._decoded_chars_used
1430 if n is None:
1431 chars = self._decoded_chars[offset:]
1432 else:
1433 chars = self._decoded_chars[offset:offset + n]
1434 self._decoded_chars_used += len(chars)
1435 return chars
1436
1437 def _rewind_decoded_chars(self, n):
1438 """Rewind the _decoded_chars buffer."""
1439 if self._decoded_chars_used < n:
1440 raise AssertionError("rewind decoded_chars out of bounds")
1441 self._decoded_chars_used -= n
1442
1443 def _read_chunk(self):
1444 """
1445 Read and decode the next chunk of data from the BufferedReader.
1446
1447 The return value is True unless EOF was reached. The decoded string
1448 is placed in self._decoded_chars (replacing its previous value).
1449 The entire input chunk is sent to the decoder, though some of it
1450 may remain buffered in the decoder, yet to be converted.
1451 """
1452
1453 if self._decoder is None:
1454 raise ValueError("no decoder")
1455
1456 if self._telling:
1457 # To prepare for tell(), we need to snapshot a point in the
1458 # file where the decoder's input buffer is empty.
1459
1460 dec_buffer, dec_flags = self._decoder.getstate()
1461 # Given this, we know there was a valid snapshot point
1462 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1463
1464 # Read a chunk, decode it, and put the result in self._decoded_chars.
1465 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1466 eof = not input_chunk
1467 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1468
1469 if self._telling:
1470 # At the snapshot point, len(dec_buffer) bytes before the read,
1471 # the next input to be decoded is dec_buffer + input_chunk.
1472 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1473
1474 return not eof
1475
1476 def _pack_cookie(self, position, dec_flags=0,
1477 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1478 # The meaning of a tell() cookie is: seek to position, set the
1479 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1480 # into the decoder with need_eof as the EOF flag, then skip
1481 # chars_to_skip characters of the decoded result. For most simple
1482 # decoders, tell() will often just give a byte offset in the file.
1483 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1484 (chars_to_skip<<192) | bool(need_eof)<<256)
1485
1486 def _unpack_cookie(self, bigint):
1487 rest, position = divmod(bigint, 1<<64)
1488 rest, dec_flags = divmod(rest, 1<<64)
1489 rest, bytes_to_feed = divmod(rest, 1<<64)
1490 need_eof, chars_to_skip = divmod(rest, 1<<64)
1491 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1492
1493 def tell(self):
1494 if not self._seekable:
1495 raise IOError("underlying stream is not seekable")
1496 if not self._telling:
1497 raise IOError("telling position disabled by next() call")
1498 self.flush()
1499 position = self.buffer.tell()
1500 decoder = self._decoder
1501 if decoder is None or self._snapshot is None:
1502 if self._decoded_chars:
1503 # This should never happen.
1504 raise AssertionError("pending decoded text")
1505 return position
1506
1507 # Skip backward to the snapshot point (see _read_chunk).
1508 dec_flags, next_input = self._snapshot
1509 position -= len(next_input)
1510
1511 # How many decoded characters have been used up since the snapshot?
1512 chars_to_skip = self._decoded_chars_used
1513 if chars_to_skip == 0:
1514 # We haven't moved from the snapshot point.
1515 return self._pack_cookie(position, dec_flags)
1516
1517 # Starting from the snapshot position, we will walk the decoder
1518 # forward until it gives us enough decoded characters.
1519 saved_state = decoder.getstate()
1520 try:
1521 # Note our initial start point.
1522 decoder.setstate((b'', dec_flags))
1523 start_pos = position
1524 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1525 need_eof = 0
1526
1527 # Feed the decoder one byte at a time. As we go, note the
1528 # nearest "safe start point" before the current location
1529 # (a point where the decoder has nothing buffered, so seek()
1530 # can safely start from there and advance to this location).
1531 next_byte = bytearray(1)
1532 for next_byte[0] in next_input:
1533 bytes_fed += 1
1534 chars_decoded += len(decoder.decode(next_byte))
1535 dec_buffer, dec_flags = decoder.getstate()
1536 if not dec_buffer and chars_decoded <= chars_to_skip:
1537 # Decoder buffer is empty, so this is a safe start point.
1538 start_pos += bytes_fed
1539 chars_to_skip -= chars_decoded
1540 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1541 if chars_decoded >= chars_to_skip:
1542 break
1543 else:
1544 # We didn't get enough decoded data; signal EOF to get more.
1545 chars_decoded += len(decoder.decode(b'', final=True))
1546 need_eof = 1
1547 if chars_decoded < chars_to_skip:
1548 raise IOError("can't reconstruct logical file position")
1549
1550 # The returned cookie corresponds to the last safe start point.
1551 return self._pack_cookie(
1552 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1553 finally:
1554 decoder.setstate(saved_state)
1555
1556 def seek(self, cookie, whence=0):
1557 if not self._seekable:
1558 raise IOError("underlying stream is not seekable")
1559 if whence == 1: # seek relative to current position
1560 if cookie != 0:
1561 raise IOError("can't do nonzero cur-relative seeks")
1562 # Seeking to the current position should attempt to
1563 # sync the underlying buffer with the current position.
1564 whence = 0
1565 cookie = self.tell()
1566 if whence == 2: # seek relative to end of file
1567 if cookie != 0:
1568 raise IOError("can't do nonzero end-relative seeks")
1569 self.flush()
1570 position = self.buffer.seek(0, 2)
1571 self._set_decoded_chars('')
1572 self._snapshot = None
1573 if self._decoder:
1574 self._decoder.reset()
1575 return position
1576 if whence != 0:
1577 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1578 (whence,))
1579 if cookie < 0:
1580 raise ValueError("negative seek position %r" % (cookie,))
1581 self.flush()
1582
1583 # The strategy of seek() is to go back to the safe start point
1584 # and replay the effect of read(chars_to_skip) from there.
1585 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1586 self._unpack_cookie(cookie)
1587
1588 # Seek back to the safe start point.
1589 self.buffer.seek(start_pos)
1590 self._set_decoded_chars('')
1591 self._snapshot = None
1592
1593 # Restore the decoder to its state from the safe start point.
1594 if self._decoder or dec_flags or chars_to_skip:
1595 self._decoder = self._decoder or self._get_decoder()
1596 self._decoder.setstate((b'', dec_flags))
1597 self._snapshot = (dec_flags, b'')
1598
1599 if chars_to_skip:
1600 # Just like _read_chunk, feed the decoder and save a snapshot.
1601 input_chunk = self.buffer.read(bytes_to_feed)
1602 self._set_decoded_chars(
1603 self._decoder.decode(input_chunk, need_eof))
1604 self._snapshot = (dec_flags, input_chunk)
1605
1606 # Skip chars_to_skip of the decoded characters.
1607 if len(self._decoded_chars) < chars_to_skip:
1608 raise IOError("can't restore logical file position")
1609 self._decoded_chars_used = chars_to_skip
1610
1611 return cookie
1612
1613 def read(self, n=None):
1614 if n is None:
1615 n = -1
1616 decoder = self._decoder or self._get_decoder()
1617 if n < 0:
1618 # Read everything.
1619 result = (self._get_decoded_chars() +
1620 decoder.decode(self.buffer.read(), final=True))
1621 self._set_decoded_chars('')
1622 self._snapshot = None
1623 return result
1624 else:
1625 # Keep reading chunks until we have n characters to return.
1626 eof = False
1627 result = self._get_decoded_chars(n)
1628 while len(result) < n and not eof:
1629 eof = not self._read_chunk()
1630 result += self._get_decoded_chars(n - len(result))
1631 return result
1632
1633 def next(self):
1634 self._telling = False
1635 line = self.readline()
1636 if not line:
1637 self._snapshot = None
1638 self._telling = self._seekable
1639 raise StopIteration
1640 return line
1641
1642 def readline(self, limit=None):
1643 if limit is None:
1644 limit = -1
1645
1646 # Grab all the decoded text (we will rewind any extra bits later).
1647 line = self._get_decoded_chars()
1648
1649 start = 0
1650 decoder = self._decoder or self._get_decoder()
1651
1652 pos = endpos = None
1653 while True:
1654 if self._readtranslate:
1655 # Newlines are already translated, only search for \n
1656 pos = line.find('\n', start)
1657 if pos >= 0:
1658 endpos = pos + 1
1659 break
1660 else:
1661 start = len(line)
1662
1663 elif self._readuniversal:
1664 # Universal newline search. Find any of \r, \r\n, \n
1665 # The decoder ensures that \r\n are not split in two pieces
1666
1667 # In C we'd look for these in parallel of course.
1668 nlpos = line.find("\n", start)
1669 crpos = line.find("\r", start)
1670 if crpos == -1:
1671 if nlpos == -1:
1672 # Nothing found
1673 start = len(line)
1674 else:
1675 # Found \n
1676 endpos = nlpos + 1
1677 break
1678 elif nlpos == -1:
1679 # Found lone \r
1680 endpos = crpos + 1
1681 break
1682 elif nlpos < crpos:
1683 # Found \n
1684 endpos = nlpos + 1
1685 break
1686 elif nlpos == crpos + 1:
1687 # Found \r\n
1688 endpos = crpos + 2
1689 break
1690 else:
1691 # Found \r
1692 endpos = crpos + 1
1693 break
1694 else:
1695 # non-universal
1696 pos = line.find(self._readnl)
1697 if pos >= 0:
1698 endpos = pos + len(self._readnl)
1699 break
1700
1701 if limit >= 0 and len(line) >= limit:
1702 endpos = limit # reached length limit
1703 break
1704
1705 # No line ending seen yet - get more data
1706 more_line = ''
1707 while self._read_chunk():
1708 if self._decoded_chars:
1709 break
1710 if self._decoded_chars:
1711 line += self._get_decoded_chars()
1712 else:
1713 # end of file
1714 self._set_decoded_chars('')
1715 self._snapshot = None
1716 return line
1717
1718 if limit >= 0 and endpos > limit:
1719 endpos = limit # don't exceed limit
1720
1721 # Rewind _decoded_chars to just after the line ending we found.
1722 self._rewind_decoded_chars(len(line) - endpos)
1723 return line[:endpos]
1724
1725 @property
1726 def newlines(self):
1727 return self._decoder.newlines if self._decoder else None
1728
1729class StringIO(TextIOWrapper):
1730
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001731 """StringIO([initial_value[, encoding, [errors, [newline]]]])
1732
1733 An in-memory stream for text. The initial_value argument sets the
1734 value of object. The other arguments are like those of TextIOWrapper's
1735 constructor.
1736 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001737
1738 def __init__(self, initial_value="", encoding="utf-8",
1739 errors="strict", newline="\n"):
1740 super(StringIO, self).__init__(BytesIO(),
1741 encoding=encoding,
1742 errors=errors,
1743 newline=newline)
1744 if initial_value:
1745 if not isinstance(initial_value, unicode):
1746 initial_value = unicode(initial_value)
1747 self.write(initial_value)
1748 self.seek(0)
1749
1750 def getvalue(self):
1751 self.flush()
1752 return self.buffer.getvalue().decode(self._encoding, self._errors)