blob: 1cf9a18e6f5ec01f7912ea4a51cc7614af035168 [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001"""
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00002The io module provides the Python interfaces to stream handling. The
3builtin open function is defined in this module.
4
5At the top of the I/O hierarchy is the abstract base class IOBase. It
6defines the basic interface to a stream. Note, however, that there is no
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00007separation between reading and writing to streams; implementations are
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00008allowed to throw an IOError if they do not support a given operation.
9
10Extending IOBase is RawIOBase which deals simply with the reading and
11writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
12an interface to OS files.
13
14BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
15subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
16streams that are readable, writable, and both respectively.
17BufferedRandom provides a buffered interface to random access
18streams. BytesIO is a simple stream of in-memory bytes.
19
20Another IOBase subclass, TextIOBase, deals with the encoding and decoding
21of streams into text. TextIOWrapper, which extends it, is a buffered text
22interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
23is a in-memory stream for text.
24
25Argument names are not part of the specification, and only the arguments
26of open() are intended to be used as keyword arguments.
27
28data:
29
30DEFAULT_BUFFER_SIZE
31
32 An int containing the default buffer size used by the module's buffered
33 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
34 possible.
35"""
36# New I/O library conforming to PEP 3116.
37
38# This is a prototype; hopefully eventually some of this will be
39# reimplemented in C.
40
41# XXX edge cases when switching between reading/writing
42# XXX need to support 1 meaning line-buffered
43# XXX whenever an argument is None, use the default value
44# XXX read/write ops should check readable/writable
45# XXX buffered readinto should work with arbitrary buffer objects
46# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
47# XXX check writable, readable and seekable in appropriate places
Christian Heimes3784c6b2008-03-26 23:13:59 +000048from __future__ import print_function
49from __future__ import unicode_literals
Christian Heimes1a6387e2008-03-26 12:49:49 +000050
51__author__ = ("Guido van Rossum <guido@python.org>, "
52 "Mike Verdone <mike.verdone@gmail.com>, "
53 "Mark Russell <mark.russell@zen.co.uk>")
54
55__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
56 "BytesIO", "StringIO", "BufferedIOBase",
57 "BufferedReader", "BufferedWriter", "BufferedRWPair",
58 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
59
60import os
61import abc
Christian Heimes1a6387e2008-03-26 12:49:49 +000062import codecs
63import _fileio
Antoine Pitrou11ec65d2008-08-14 21:04:30 +000064import threading
Christian Heimes1a6387e2008-03-26 12:49:49 +000065
66# open() uses st_blksize whenever we can
67DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
68
Georg Brandl88ed8f22009-04-01 21:00:55 +000069# for seek()
70SEEK_SET = 0
71SEEK_CUR = 1
72SEEK_END = 2
73
Christian Heimes1a6387e2008-03-26 12:49:49 +000074# py3k has only new style classes
75__metaclass__ = type
76
77class BlockingIOError(IOError):
78
79 """Exception raised when I/O would block on a non-blocking I/O stream."""
80
81 def __init__(self, errno, strerror, characters_written=0):
82 IOError.__init__(self, errno, strerror)
83 self.characters_written = characters_written
84
85
86def open(file, mode="r", buffering=None, encoding=None, errors=None,
87 newline=None, closefd=True):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +000088 r"""Open file and return a stream. If the file cannot be opened, an IOError is
89 raised.
Christian Heimes1a6387e2008-03-26 12:49:49 +000090
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000091 file is either a string giving the name (and the path if the file
92 isn't in the current working directory) of the file to be opened or an
93 integer file descriptor of the file to be wrapped. (If a file
94 descriptor is given, it is closed when the returned I/O object is
95 closed, unless closefd is set to False.)
Christian Heimes1a6387e2008-03-26 12:49:49 +000096
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +000097 mode is an optional string that specifies the mode in which the file
98 is opened. It defaults to 'r' which means open for reading in text
99 mode. Other common values are 'w' for writing (truncating the file if
100 it already exists), and 'a' for appending (which on some Unix systems,
101 means that all writes append to the end of the file regardless of the
102 current seek position). In text mode, if encoding is not specified the
103 encoding used is platform dependent. (For reading and writing raw
104 bytes use binary mode and leave encoding unspecified.) The available
105 modes are:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000106
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000107 ========= ===============================================================
108 Character Meaning
109 --------- ---------------------------------------------------------------
110 'r' open for reading (default)
111 'w' open for writing, truncating the file first
112 'a' open for writing, appending to the end of the file if it exists
113 'b' binary mode
114 't' text mode (default)
115 '+' open a disk file for updating (reading and writing)
116 'U' universal newline mode (for backwards compatibility; unneeded
117 for new code)
118 ========= ===============================================================
Christian Heimes1a6387e2008-03-26 12:49:49 +0000119
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000120 The default mode is 'rt' (open for reading text). For binary random
121 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
122 'r+b' opens the file without truncation.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000123
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000124 Python distinguishes between files opened in binary and text modes,
125 even when the underlying operating system doesn't. Files opened in
126 binary mode (appending 'b' to the mode argument) return contents as
127 bytes objects without any decoding. In text mode (the default, or when
128 't' is appended to the mode argument), the contents of the file are
129 returned as strings, the bytes having been first decoded using a
130 platform-dependent encoding or using the specified encoding if given.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000131
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000132 buffering is an optional integer used to set the buffering policy. By
133 default full buffering is on. Pass 0 to switch buffering off (only
134 allowed in binary mode), 1 to set line buffering, and an integer > 1
135 for full buffering.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000136
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000137 encoding is the name of the encoding used to decode or encode the
138 file. This should only be used in text mode. The default encoding is
139 platform dependent, but any encoding supported by Python can be
140 passed. See the codecs module for the list of supported encodings.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000141
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000142 errors is an optional string that specifies how encoding errors are to
143 be handled---this argument should not be used in binary mode. Pass
144 'strict' to raise a ValueError exception if there is an encoding error
145 (the default of None has the same effect), or pass 'ignore' to ignore
146 errors. (Note that ignoring encoding errors can lead to data loss.)
147 See the documentation for codecs.register for a list of the permitted
148 encoding error strings.
149
150 newline controls how universal newlines works (it only applies to text
151 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
152 follows:
153
154 * On input, if newline is None, universal newlines mode is
155 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
156 these are translated into '\n' before being returned to the
157 caller. If it is '', universal newline mode is enabled, but line
158 endings are returned to the caller untranslated. If it has any of
159 the other legal values, input lines are only terminated by the given
160 string, and the line ending is returned to the caller untranslated.
161
162 * On output, if newline is None, any '\n' characters written are
163 translated to the system default line separator, os.linesep. If
164 newline is '', no translation takes place. If newline is any of the
165 other legal values, any '\n' characters written are translated to
166 the given string.
167
168 If closefd is False, the underlying file descriptor will be kept open
169 when the file is closed. This does not work when a file name is given
170 and must be True in that case.
171
172 open() returns a file object whose type depends on the mode, and
173 through which the standard file operations such as reading and writing
174 are performed. When open() is used to open a file in a text mode ('w',
175 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
176 a file in a binary mode, the returned class varies: in read binary
177 mode, it returns a BufferedReader; in write binary and append binary
178 modes, it returns a BufferedWriter, and in read/write mode, it returns
179 a BufferedRandom.
180
181 It is also possible to use a string or bytearray as a file for both
182 reading and writing. For strings StringIO can be used like a file
183 opened in a text mode, and for bytes a BytesIO can be used like a file
184 opened in a binary mode.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000185 """
Christian Heimes3784c6b2008-03-26 23:13:59 +0000186 if not isinstance(file, (basestring, int)):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000187 raise TypeError("invalid file: %r" % file)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000188 if not isinstance(mode, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000189 raise TypeError("invalid mode: %r" % mode)
190 if buffering is not None and not isinstance(buffering, int):
191 raise TypeError("invalid buffering: %r" % buffering)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000192 if encoding is not None and not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000193 raise TypeError("invalid encoding: %r" % encoding)
Christian Heimes3784c6b2008-03-26 23:13:59 +0000194 if errors is not None and not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000195 raise TypeError("invalid errors: %r" % errors)
196 modes = set(mode)
197 if modes - set("arwb+tU") or len(mode) > len(modes):
198 raise ValueError("invalid mode: %r" % mode)
199 reading = "r" in modes
200 writing = "w" in modes
201 appending = "a" in modes
202 updating = "+" in modes
203 text = "t" in modes
204 binary = "b" in modes
205 if "U" in modes:
206 if writing or appending:
207 raise ValueError("can't use U and writing mode at once")
208 reading = True
209 if text and binary:
210 raise ValueError("can't have text and binary mode at once")
211 if reading + writing + appending > 1:
212 raise ValueError("can't have read/write/append mode at once")
213 if not (reading or writing or appending):
214 raise ValueError("must have exactly one of read/write/append mode")
215 if binary and encoding is not None:
216 raise ValueError("binary mode doesn't take an encoding argument")
217 if binary and errors is not None:
218 raise ValueError("binary mode doesn't take an errors argument")
219 if binary and newline is not None:
220 raise ValueError("binary mode doesn't take a newline argument")
221 raw = FileIO(file,
222 (reading and "r" or "") +
223 (writing and "w" or "") +
224 (appending and "a" or "") +
225 (updating and "+" or ""),
226 closefd)
227 if buffering is None:
228 buffering = -1
229 line_buffering = False
230 if buffering == 1 or buffering < 0 and raw.isatty():
231 buffering = -1
232 line_buffering = True
233 if buffering < 0:
234 buffering = DEFAULT_BUFFER_SIZE
235 try:
236 bs = os.fstat(raw.fileno()).st_blksize
237 except (os.error, AttributeError):
238 pass
239 else:
240 if bs > 1:
241 buffering = bs
242 if buffering < 0:
243 raise ValueError("invalid buffering size")
244 if buffering == 0:
245 if binary:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000246 return raw
247 raise ValueError("can't have unbuffered text I/O")
248 if updating:
249 buffer = BufferedRandom(raw, buffering)
250 elif writing or appending:
251 buffer = BufferedWriter(raw, buffering)
252 elif reading:
253 buffer = BufferedReader(raw, buffering)
254 else:
255 raise ValueError("unknown mode: %r" % mode)
256 if binary:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000257 return buffer
258 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000259 text.mode = mode
260 return text
261
262class _DocDescriptor:
263 """Helper for builtins.open.__doc__
264 """
265 def __get__(self, obj, typ):
266 return (
267 "open(file, mode='r', buffering=None, encoding=None, "
268 "errors=None, newline=None, closefd=True)\n\n" +
269 open.__doc__)
270
271class OpenWrapper:
272 """Wrapper for builtins.open
273
274 Trick so that open won't become a bound method when stored
275 as a class variable (as dumbdbm does).
276
277 See initstdio() in Python/pythonrun.c.
278 """
279 __doc__ = _DocDescriptor()
280
281 def __new__(cls, *args, **kwargs):
282 return open(*args, **kwargs)
283
284
285class UnsupportedOperation(ValueError, IOError):
286 pass
287
288
289class IOBase(object):
290
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000291 """The abstract base class for all I/O classes, acting on streams of
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000292 bytes. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000293
294 This class provides dummy implementations for many methods that
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000295 derived classes can override selectively; the default implementations
296 represent a file that cannot be read, written or seeked.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000297
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000298 Even though IOBase does not declare read, readinto, or write because
299 their signatures will vary, implementations and clients should
300 consider those methods part of the interface. Also, implementations
301 may raise a IOError when operations they do not support are called.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000302
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000303 The basic type used for binary data read from or written to a file is
304 bytes. bytearrays are accepted too, and in some cases (such as
305 readinto) needed. Text I/O classes work with str data.
306
307 Note that calling any method (even inquiries) on a closed stream is
308 undefined. Implementations may raise IOError in this case.
309
310 IOBase (and its subclasses) support the iterator protocol, meaning
311 that an IOBase object can be iterated over yielding the lines in a
312 stream.
313
314 IOBase also supports the :keyword:`with` statement. In this example,
315 fp is closed after the suite of the with statment is complete:
316
317 with open('spam.txt', 'r') as fp:
318 fp.write('Spam and eggs!')
Christian Heimes1a6387e2008-03-26 12:49:49 +0000319 """
320
321 __metaclass__ = abc.ABCMeta
322
323 ### Internal ###
324
325 def _unsupported(self, name):
326 """Internal: raise an exception for unsupported operations."""
327 raise UnsupportedOperation("%s.%s() not supported" %
328 (self.__class__.__name__, name))
329
330 ### Positioning ###
331
332 def seek(self, pos, whence = 0):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000333 """Change stream position.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000334
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000335 Change the stream position to byte offset offset. offset is
336 interpreted relative to the position indicated by whence. Values
337 for whence are:
338
339 * 0 -- start of stream (the default); offset should be zero or positive
340 * 1 -- current stream position; offset may be negative
341 * 2 -- end of stream; offset is usually negative
342
343 Return the new absolute position.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000344 """
345 self._unsupported("seek")
346
347 def tell(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000348 """Return current stream position."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000349 return self.seek(0, 1)
350
351 def truncate(self, pos = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000352 """Truncate file to size bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000353
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000354 Size defaults to the current IO position as reported by tell(). Return
355 the new size.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000356 """
357 self._unsupported("truncate")
358
359 ### Flush and close ###
360
361 def flush(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000362 """Flush write buffers, if applicable.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000363
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000364 This is not implemented for read-only and non-blocking streams.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000365 """
366 # XXX Should this return the number of bytes written???
367
368 __closed = False
369
370 def close(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000371 """Flush and close the IO object.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000372
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000373 This method has no effect if the file is already closed.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000374 """
375 if not self.__closed:
376 try:
377 self.flush()
378 except IOError:
379 pass # If flush() fails, just give up
380 self.__closed = True
381
382 def __del__(self):
383 """Destructor. Calls close()."""
384 # The try/except block is in case this is called at program
385 # exit time, when it's possible that globals have already been
386 # deleted, and then the close() call might fail. Since
387 # there's nothing we can do about such failures and they annoy
388 # the end users, we suppress the traceback.
389 try:
390 self.close()
391 except:
392 pass
393
394 ### Inquiries ###
395
396 def seekable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000397 """Return whether object supports random access.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000398
399 If False, seek(), tell() and truncate() will raise IOError.
400 This method may need to do a test seek().
401 """
402 return False
403
404 def _checkSeekable(self, msg=None):
405 """Internal: raise an IOError if file is not seekable
406 """
407 if not self.seekable():
408 raise IOError("File or stream is not seekable."
409 if msg is None else msg)
410
411
412 def readable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000413 """Return whether object was opened for reading.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000414
415 If False, read() will raise IOError.
416 """
417 return False
418
419 def _checkReadable(self, msg=None):
420 """Internal: raise an IOError if file is not readable
421 """
422 if not self.readable():
423 raise IOError("File or stream is not readable."
424 if msg is None else msg)
425
426 def writable(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000427 """Return whether object was opened for writing.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000428
429 If False, write() and truncate() will raise IOError.
430 """
431 return False
432
433 def _checkWritable(self, msg=None):
434 """Internal: raise an IOError if file is not writable
435 """
436 if not self.writable():
437 raise IOError("File or stream is not writable."
438 if msg is None else msg)
439
440 @property
441 def closed(self):
442 """closed: bool. True iff the file has been closed.
443
444 For backwards compatibility, this is a property, not a predicate.
445 """
446 return self.__closed
447
448 def _checkClosed(self, msg=None):
449 """Internal: raise an ValueError if file is closed
450 """
451 if self.closed:
452 raise ValueError("I/O operation on closed file."
453 if msg is None else msg)
454
455 ### Context manager ###
456
457 def __enter__(self):
458 """Context management protocol. Returns self."""
459 self._checkClosed()
460 return self
461
462 def __exit__(self, *args):
463 """Context management protocol. Calls close()"""
464 self.close()
465
466 ### Lower-level APIs ###
467
468 # XXX Should these be present even if unimplemented?
469
470 def fileno(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000471 """Returns underlying file descriptor if one exists.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000472
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000473 An IOError is raised if the IO object does not use a file descriptor.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000474 """
475 self._unsupported("fileno")
476
477 def isatty(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000478 """Return whether this is an 'interactive' stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000479
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000480 Return False if it can't be determined.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000481 """
482 self._checkClosed()
483 return False
484
485 ### Readline[s] and writelines ###
486
487 def readline(self, limit = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000488 r"""Read and return a line from the stream.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000489
490 If limit is specified, at most limit bytes will be read.
491
492 The line terminator is always b'\n' for binary files; for text
493 files, the newlines argument to open can be used to select the line
494 terminator(s) recognized.
495 """
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000496 self._checkClosed()
Christian Heimes1a6387e2008-03-26 12:49:49 +0000497 if hasattr(self, "peek"):
498 def nreadahead():
499 readahead = self.peek(1)
500 if not readahead:
501 return 1
502 n = (readahead.find(b"\n") + 1) or len(readahead)
503 if limit >= 0:
504 n = min(n, limit)
505 return n
506 else:
507 def nreadahead():
508 return 1
509 if limit is None:
510 limit = -1
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000511 if not isinstance(limit, (int, long)):
512 raise TypeError("limit must be an integer")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000513 res = bytearray()
514 while limit < 0 or len(res) < limit:
515 b = self.read(nreadahead())
516 if not b:
517 break
518 res += b
519 if res.endswith(b"\n"):
520 break
521 return bytes(res)
522
523 def __iter__(self):
524 self._checkClosed()
525 return self
526
527 def next(self):
528 line = self.readline()
529 if not line:
530 raise StopIteration
531 return line
532
533 def readlines(self, hint=None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000534 """Return a list of lines from the stream.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000535
536 hint can be specified to control the number of lines read: no more
537 lines will be read if the total size (in bytes/characters) of all
538 lines so far exceeds hint.
539 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000540 if hint is None:
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000541 hint = -1
542 if not isinstance(hint, (int, long)):
543 raise TypeError("hint must be an integer")
544 if hint <= 0:
Christian Heimes1a6387e2008-03-26 12:49:49 +0000545 return list(self)
546 n = 0
547 lines = []
548 for line in self:
549 lines.append(line)
550 n += len(line)
551 if n >= hint:
552 break
553 return lines
554
555 def writelines(self, lines):
556 self._checkClosed()
557 for line in lines:
558 self.write(line)
559
560
561class RawIOBase(IOBase):
562
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000563 """Base class for raw binary I/O."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000564
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000565 # The read() method is implemented by calling readinto(); derived
566 # classes that want to support read() only need to implement
567 # readinto() as a primitive operation. In general, readinto() can be
568 # more efficient than read().
Christian Heimes1a6387e2008-03-26 12:49:49 +0000569
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000570 # (It would be tempting to also provide an implementation of
571 # readinto() in terms of read(), in case the latter is a more suitable
572 # primitive operation, but that would lead to nasty recursion in case
573 # a subclass doesn't implement either.)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574
575 def read(self, n = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000576 """Read and return up to n bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000577
578 Returns an empty bytes array on EOF, or None if the object is
579 set not to block and has no data to read.
580 """
581 if n is None:
582 n = -1
583 if n < 0:
584 return self.readall()
585 b = bytearray(n.__index__())
586 n = self.readinto(b)
587 del b[n:]
588 return bytes(b)
589
590 def readall(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000591 """Read until EOF, using multiple read() call."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000592 res = bytearray()
593 while True:
594 data = self.read(DEFAULT_BUFFER_SIZE)
595 if not data:
596 break
597 res += data
598 return bytes(res)
599
600 def readinto(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000601 """Read up to len(b) bytes into b.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000602
603 Returns number of bytes read (0 for EOF), or None if the object
604 is set not to block as has no data to read.
605 """
606 self._unsupported("readinto")
607
608 def write(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000609 """Write the given buffer to the IO stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000610
611 Returns the number of bytes written, which may be less than len(b).
612 """
613 self._unsupported("write")
614
615
616class FileIO(_fileio._FileIO, RawIOBase):
617
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000618 """Raw I/O implementation for OS files."""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000619
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000620 # This multiply inherits from _FileIO and RawIOBase to make
621 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
622 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
623 # to do since _fileio.c is written in C).
Christian Heimes1a6387e2008-03-26 12:49:49 +0000624
Benjamin Petersonad100c32008-11-20 22:06:22 +0000625 def __init__(self, name, mode="r", closefd=True):
626 _fileio._FileIO.__init__(self, name, mode, closefd)
627 self._name = name
628
Christian Heimes1a6387e2008-03-26 12:49:49 +0000629 def close(self):
630 _fileio._FileIO.close(self)
631 RawIOBase.close(self)
632
633 @property
634 def name(self):
635 return self._name
636
Christian Heimes1a6387e2008-03-26 12:49:49 +0000637
638class BufferedIOBase(IOBase):
639
640 """Base class for buffered IO objects.
641
642 The main difference with RawIOBase is that the read() method
643 supports omitting the size argument, and does not have a default
644 implementation that defers to readinto().
645
646 In addition, read(), readinto() and write() may raise
647 BlockingIOError if the underlying raw stream is in non-blocking
648 mode and not ready; unlike their raw counterparts, they will never
649 return None.
650
651 A typical implementation should not inherit from a RawIOBase
652 implementation, but wrap one.
653 """
654
655 def read(self, n = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000656 """Read and return up to n bytes.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000657
658 If the argument is omitted, None, or negative, reads and
659 returns all data until EOF.
660
661 If the argument is positive, and the underlying raw stream is
662 not 'interactive', multiple raw reads may be issued to satisfy
663 the byte count (unless EOF is reached first). But for
664 interactive raw streams (XXX and for pipes?), at most one raw
665 read will be issued, and a short result does not imply that
666 EOF is imminent.
667
668 Returns an empty bytes array on EOF.
669
670 Raises BlockingIOError if the underlying raw stream has no
671 data at the moment.
672 """
673 self._unsupported("read")
674
675 def readinto(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000676 """Read up to len(b) bytes into b.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000677
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000678 Like read(), this may issue multiple reads to the underlying raw
679 stream, unless the latter is 'interactive'.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000680
681 Returns the number of bytes read (0 for EOF).
682
683 Raises BlockingIOError if the underlying raw stream has no
684 data at the moment.
685 """
686 # XXX This ought to work with anything that supports the buffer API
687 data = self.read(len(b))
688 n = len(data)
689 try:
690 b[:n] = data
691 except TypeError as err:
692 import array
693 if not isinstance(b, array.array):
694 raise err
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000695 b[:n] = array.array(b'b', data)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000696 return n
697
698 def write(self, b):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000699 """Write the given buffer to the IO stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +0000700
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000701 Return the number of bytes written, which is never less than
Christian Heimes1a6387e2008-03-26 12:49:49 +0000702 len(b).
703
704 Raises BlockingIOError if the buffer is full and the
705 underlying raw stream cannot accept more data at the moment.
706 """
707 self._unsupported("write")
708
709
710class _BufferedIOMixin(BufferedIOBase):
711
712 """A mixin implementation of BufferedIOBase with an underlying raw stream.
713
714 This passes most requests on to the underlying raw stream. It
715 does *not* provide implementations of read(), readinto() or
716 write().
717 """
718
719 def __init__(self, raw):
720 self.raw = raw
721
722 ### Positioning ###
723
724 def seek(self, pos, whence=0):
725 return self.raw.seek(pos, whence)
726
727 def tell(self):
728 return self.raw.tell()
729
730 def truncate(self, pos=None):
731 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
732 # and a flush may be necessary to synch both views of the current
733 # file state.
734 self.flush()
735
736 if pos is None:
737 pos = self.tell()
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000738 # XXX: Should seek() be used, instead of passing the position
739 # XXX directly to truncate?
Christian Heimes1a6387e2008-03-26 12:49:49 +0000740 return self.raw.truncate(pos)
741
742 ### Flush and close ###
743
744 def flush(self):
745 self.raw.flush()
746
747 def close(self):
748 if not self.closed:
749 try:
750 self.flush()
751 except IOError:
752 pass # If flush() fails, just give up
753 self.raw.close()
754
755 ### Inquiries ###
756
757 def seekable(self):
758 return self.raw.seekable()
759
760 def readable(self):
761 return self.raw.readable()
762
763 def writable(self):
764 return self.raw.writable()
765
766 @property
767 def closed(self):
768 return self.raw.closed
769
Benjamin Petersonad100c32008-11-20 22:06:22 +0000770 @property
771 def name(self):
772 return self.raw.name
773
774 @property
775 def mode(self):
776 return self.raw.mode
777
Christian Heimes1a6387e2008-03-26 12:49:49 +0000778 ### Lower-level APIs ###
779
780 def fileno(self):
781 return self.raw.fileno()
782
783 def isatty(self):
784 return self.raw.isatty()
785
786
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000787class _BytesIO(BufferedIOBase):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788
789 """Buffered I/O implementation using an in-memory bytes buffer."""
790
791 # XXX More docs
792
793 def __init__(self, initial_bytes=None):
794 buf = bytearray()
795 if initial_bytes is not None:
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000796 buf += bytearray(initial_bytes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000797 self._buffer = buf
798 self._pos = 0
799
800 def getvalue(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000801 """Return the bytes value (contents) of the buffer
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000802 """
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000803 if self.closed:
804 raise ValueError("getvalue on closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000805 return bytes(self._buffer)
806
807 def read(self, n=None):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000808 if self.closed:
809 raise ValueError("read from closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000810 if n is None:
811 n = -1
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000812 if not isinstance(n, (int, long)):
813 raise TypeError("argument must be an integer")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000814 if n < 0:
815 n = len(self._buffer)
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000816 if len(self._buffer) <= self._pos:
817 return b""
Christian Heimes1a6387e2008-03-26 12:49:49 +0000818 newpos = min(len(self._buffer), self._pos + n)
819 b = self._buffer[self._pos : newpos]
820 self._pos = newpos
821 return bytes(b)
822
823 def read1(self, n):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +0000824 """this is the same as read.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000825 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000826 return self.read(n)
827
828 def write(self, b):
829 if self.closed:
830 raise ValueError("write to closed file")
831 if isinstance(b, unicode):
832 raise TypeError("can't write unicode to binary stream")
833 n = len(b)
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000834 if n == 0:
835 return 0
Alexandre Vassalotti844f7572008-05-10 19:59:16 +0000836 pos = self._pos
837 if pos > len(self._buffer):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000838 # Inserts null bytes between the current end of the file
839 # and the new write position.
Alexandre Vassalotti844f7572008-05-10 19:59:16 +0000840 padding = b'\x00' * (pos - len(self._buffer))
841 self._buffer += padding
842 self._buffer[pos:pos + n] = b
843 self._pos += n
Christian Heimes1a6387e2008-03-26 12:49:49 +0000844 return n
845
846 def seek(self, pos, whence=0):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000847 if self.closed:
848 raise ValueError("seek on closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849 try:
850 pos = pos.__index__()
851 except AttributeError as err:
852 raise TypeError("an integer is required") # from err
853 if whence == 0:
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000854 if pos < 0:
855 raise ValueError("negative seek position %r" % (pos,))
856 self._pos = pos
Christian Heimes1a6387e2008-03-26 12:49:49 +0000857 elif whence == 1:
858 self._pos = max(0, self._pos + pos)
859 elif whence == 2:
860 self._pos = max(0, len(self._buffer) + pos)
861 else:
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000862 raise ValueError("invalid whence value")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000863 return self._pos
864
865 def tell(self):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000866 if self.closed:
867 raise ValueError("tell on closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000868 return self._pos
869
870 def truncate(self, pos=None):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000871 if self.closed:
872 raise ValueError("truncate on closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +0000873 if pos is None:
874 pos = self._pos
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000875 elif pos < 0:
876 raise ValueError("negative truncate position %r" % (pos,))
Christian Heimes1a6387e2008-03-26 12:49:49 +0000877 del self._buffer[pos:]
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000878 return self.seek(pos)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000879
880 def readable(self):
881 return True
882
883 def writable(self):
884 return True
885
886 def seekable(self):
887 return True
888
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000889# Use the faster implementation of BytesIO if available
890try:
891 import _bytesio
892
893 class BytesIO(_bytesio._BytesIO, BufferedIOBase):
894 __doc__ = _bytesio._BytesIO.__doc__
895
896except ImportError:
897 BytesIO = _BytesIO
898
Christian Heimes1a6387e2008-03-26 12:49:49 +0000899
900class BufferedReader(_BufferedIOMixin):
901
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +0000902 """BufferedReader(raw[, buffer_size])
903
904 A buffer for a readable, sequential BaseRawIO object.
905
906 The constructor creates a BufferedReader for the given readable raw
907 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
908 is used.
909 """
Christian Heimes1a6387e2008-03-26 12:49:49 +0000910
911 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
912 """Create a new buffered reader using the given readable raw IO object.
913 """
914 raw._checkReadable()
915 _BufferedIOMixin.__init__(self, raw)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000916 self.buffer_size = buffer_size
Benjamin Peterson01a24322008-07-28 23:35:27 +0000917 self._reset_read_buf()
Antoine Pitrou11ec65d2008-08-14 21:04:30 +0000918 self._read_lock = threading.Lock()
Benjamin Peterson01a24322008-07-28 23:35:27 +0000919
920 def _reset_read_buf(self):
921 self._read_buf = b""
922 self._read_pos = 0
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923
924 def read(self, n=None):
925 """Read n bytes.
926
927 Returns exactly n bytes of data unless the underlying raw IO
928 stream reaches EOF or if the call would block in non-blocking
929 mode. If n is negative, read until EOF or until read() would
930 block.
931 """
Antoine Pitrou11ec65d2008-08-14 21:04:30 +0000932 with self._read_lock:
933 return self._read_unlocked(n)
934
935 def _read_unlocked(self, n=None):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000936 nodata_val = b""
Benjamin Peterson01a24322008-07-28 23:35:27 +0000937 empty_values = (b"", None)
938 buf = self._read_buf
939 pos = self._read_pos
940
941 # Special case for when the number of bytes to read is unspecified.
942 if n is None or n == -1:
943 self._reset_read_buf()
944 chunks = [buf[pos:]] # Strip the consumed bytes.
945 current_size = 0
946 while True:
947 # Read until EOF or until read() would block.
948 chunk = self.raw.read()
949 if chunk in empty_values:
950 nodata_val = chunk
951 break
952 current_size += len(chunk)
953 chunks.append(chunk)
954 return b"".join(chunks) or nodata_val
955
956 # The number of bytes to read is specified, return at most n bytes.
957 avail = len(buf) - pos # Length of the available buffered data.
958 if n <= avail:
959 # Fast path: the data to read is fully buffered.
960 self._read_pos += n
961 return buf[pos:pos+n]
962 # Slow path: read from the stream until enough bytes are read,
963 # or until an EOF occurs or until read() would block.
964 chunks = [buf[pos:]]
965 wanted = max(self.buffer_size, n)
966 while avail < n:
967 chunk = self.raw.read(wanted)
968 if chunk in empty_values:
969 nodata_val = chunk
Christian Heimes1a6387e2008-03-26 12:49:49 +0000970 break
Benjamin Peterson01a24322008-07-28 23:35:27 +0000971 avail += len(chunk)
972 chunks.append(chunk)
973 # n is more then avail only when an EOF occurred or when
974 # read() would have blocked.
975 n = min(n, avail)
976 out = b"".join(chunks)
977 self._read_buf = out[n:] # Save the extra data in the buffer.
978 self._read_pos = 0
979 return out[:n] if out else nodata_val
Christian Heimes1a6387e2008-03-26 12:49:49 +0000980
981 def peek(self, n=0):
982 """Returns buffered bytes without advancing the position.
983
984 The argument indicates a desired minimal number of bytes; we
985 do at most one raw read to satisfy it. We never return more
986 than self.buffer_size.
987 """
Antoine Pitrou11ec65d2008-08-14 21:04:30 +0000988 with self._read_lock:
989 return self._peek_unlocked(n)
990
991 def _peek_unlocked(self, n=0):
Christian Heimes1a6387e2008-03-26 12:49:49 +0000992 want = min(n, self.buffer_size)
Benjamin Peterson01a24322008-07-28 23:35:27 +0000993 have = len(self._read_buf) - self._read_pos
Christian Heimes1a6387e2008-03-26 12:49:49 +0000994 if have < want:
995 to_read = self.buffer_size - have
996 current = self.raw.read(to_read)
997 if current:
Benjamin Peterson01a24322008-07-28 23:35:27 +0000998 self._read_buf = self._read_buf[self._read_pos:] + current
999 self._read_pos = 0
1000 return self._read_buf[self._read_pos:]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001001
1002 def read1(self, n):
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001003 """Reads up to n bytes, with at most one read() system call."""
1004 # Returns up to n bytes. If at least one byte is buffered, we
1005 # only return buffered bytes. Otherwise, we do one raw read.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006 if n <= 0:
1007 return b""
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001008 with self._read_lock:
1009 self._peek_unlocked(1)
1010 return self._read_unlocked(
1011 min(n, len(self._read_buf) - self._read_pos))
Christian Heimes1a6387e2008-03-26 12:49:49 +00001012
1013 def tell(self):
Benjamin Peterson01a24322008-07-28 23:35:27 +00001014 return self.raw.tell() - len(self._read_buf) + self._read_pos
Christian Heimes1a6387e2008-03-26 12:49:49 +00001015
1016 def seek(self, pos, whence=0):
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001017 with self._read_lock:
1018 if whence == 1:
1019 pos -= len(self._read_buf) - self._read_pos
1020 pos = self.raw.seek(pos, whence)
1021 self._reset_read_buf()
1022 return pos
Christian Heimes1a6387e2008-03-26 12:49:49 +00001023
1024
1025class BufferedWriter(_BufferedIOMixin):
1026
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001027 """A buffer for a writeable sequential RawIO object.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001028
1029 The constructor creates a BufferedWriter for the given writeable raw
1030 stream. If the buffer_size is not given, it defaults to
1031 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1032 twice the buffer size.
1033 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001034
1035 def __init__(self, raw,
1036 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1037 raw._checkWritable()
1038 _BufferedIOMixin.__init__(self, raw)
1039 self.buffer_size = buffer_size
1040 self.max_buffer_size = (2*buffer_size
1041 if max_buffer_size is None
1042 else max_buffer_size)
1043 self._write_buf = bytearray()
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001044 self._write_lock = threading.Lock()
Christian Heimes1a6387e2008-03-26 12:49:49 +00001045
1046 def write(self, b):
1047 if self.closed:
1048 raise ValueError("write to closed file")
1049 if isinstance(b, unicode):
1050 raise TypeError("can't write unicode to binary stream")
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001051 with self._write_lock:
1052 # XXX we can implement some more tricks to try and avoid
1053 # partial writes
1054 if len(self._write_buf) > self.buffer_size:
1055 # We're full, so let's pre-flush the buffer
1056 try:
1057 self._flush_unlocked()
1058 except BlockingIOError as e:
1059 # We can't accept anything else.
1060 # XXX Why not just let the exception pass through?
1061 raise BlockingIOError(e.errno, e.strerror, 0)
1062 before = len(self._write_buf)
1063 self._write_buf.extend(b)
1064 written = len(self._write_buf) - before
1065 if len(self._write_buf) > self.buffer_size:
1066 try:
1067 self._flush_unlocked()
1068 except BlockingIOError as e:
1069 if len(self._write_buf) > self.max_buffer_size:
1070 # We've hit max_buffer_size. We have to accept a
1071 # partial write and cut back our buffer.
1072 overage = len(self._write_buf) - self.max_buffer_size
1073 self._write_buf = self._write_buf[:self.max_buffer_size]
1074 raise BlockingIOError(e.errno, e.strerror, overage)
1075 return written
Christian Heimes1a6387e2008-03-26 12:49:49 +00001076
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001077 def truncate(self, pos=None):
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001078 with self._write_lock:
1079 self._flush_unlocked()
1080 if pos is None:
1081 pos = self.raw.tell()
1082 return self.raw.truncate(pos)
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001083
Christian Heimes1a6387e2008-03-26 12:49:49 +00001084 def flush(self):
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001085 with self._write_lock:
1086 self._flush_unlocked()
1087
1088 def _flush_unlocked(self):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001089 if self.closed:
1090 raise ValueError("flush of closed file")
1091 written = 0
1092 try:
1093 while self._write_buf:
1094 n = self.raw.write(self._write_buf)
1095 del self._write_buf[:n]
1096 written += n
1097 except BlockingIOError as e:
1098 n = e.characters_written
1099 del self._write_buf[:n]
1100 written += n
1101 raise BlockingIOError(e.errno, e.strerror, written)
1102
1103 def tell(self):
1104 return self.raw.tell() + len(self._write_buf)
1105
1106 def seek(self, pos, whence=0):
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001107 with self._write_lock:
1108 self._flush_unlocked()
1109 return self.raw.seek(pos, whence)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001110
1111
1112class BufferedRWPair(BufferedIOBase):
1113
1114 """A buffered reader and writer object together.
1115
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001116 A buffered reader object and buffered writer object put together to
1117 form a sequential IO object that can read and write. This is typically
1118 used with a socket or two-way pipe.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001119
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001120 reader and writer are RawIOBase objects that are readable and
1121 writeable respectively. If the buffer_size is omitted it defaults to
1122 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1123 defaults to twice the buffer size.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001124 """
1125
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001126 # XXX The usefulness of this (compared to having two separate IO
1127 # objects) is questionable.
1128
Christian Heimes1a6387e2008-03-26 12:49:49 +00001129 def __init__(self, reader, writer,
1130 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1131 """Constructor.
1132
1133 The arguments are two RawIO instances.
1134 """
1135 reader._checkReadable()
1136 writer._checkWritable()
1137 self.reader = BufferedReader(reader, buffer_size)
1138 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1139
1140 def read(self, n=None):
1141 if n is None:
1142 n = -1
1143 return self.reader.read(n)
1144
1145 def readinto(self, b):
1146 return self.reader.readinto(b)
1147
1148 def write(self, b):
1149 return self.writer.write(b)
1150
1151 def peek(self, n=0):
1152 return self.reader.peek(n)
1153
1154 def read1(self, n):
1155 return self.reader.read1(n)
1156
1157 def readable(self):
1158 return self.reader.readable()
1159
1160 def writable(self):
1161 return self.writer.writable()
1162
1163 def flush(self):
1164 return self.writer.flush()
1165
1166 def close(self):
1167 self.writer.close()
1168 self.reader.close()
1169
1170 def isatty(self):
1171 return self.reader.isatty() or self.writer.isatty()
1172
1173 @property
1174 def closed(self):
Benjamin Peterson54686e32008-12-24 15:10:27 +00001175 return self.writer.closed
Christian Heimes1a6387e2008-03-26 12:49:49 +00001176
1177
1178class BufferedRandom(BufferedWriter, BufferedReader):
1179
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001180 """A buffered interface to random access streams.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001181
1182 The constructor creates a reader and writer for a seekable stream,
1183 raw, given in the first argument. If the buffer_size is omitted it
1184 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1185 writer) defaults to twice the buffer size.
1186 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001187
1188 def __init__(self, raw,
1189 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1190 raw._checkSeekable()
1191 BufferedReader.__init__(self, raw, buffer_size)
1192 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1193
1194 def seek(self, pos, whence=0):
1195 self.flush()
1196 # First do the raw seek, then empty the read buffer, so that
1197 # if the raw seek fails, we don't lose buffered data forever.
1198 pos = self.raw.seek(pos, whence)
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001199 with self._read_lock:
1200 self._reset_read_buf()
Christian Heimes1a6387e2008-03-26 12:49:49 +00001201 return pos
1202
1203 def tell(self):
Benjamin Peterson01a24322008-07-28 23:35:27 +00001204 if self._write_buf:
Christian Heimes1a6387e2008-03-26 12:49:49 +00001205 return self.raw.tell() + len(self._write_buf)
1206 else:
Benjamin Peterson01a24322008-07-28 23:35:27 +00001207 return BufferedReader.tell(self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001208
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001209 def truncate(self, pos=None):
1210 if pos is None:
1211 pos = self.tell()
1212 # Use seek to flush the read buffer.
1213 self.seek(pos)
1214 return BufferedWriter.truncate(self)
1215
Christian Heimes1a6387e2008-03-26 12:49:49 +00001216 def read(self, n=None):
1217 if n is None:
1218 n = -1
1219 self.flush()
1220 return BufferedReader.read(self, n)
1221
1222 def readinto(self, b):
1223 self.flush()
1224 return BufferedReader.readinto(self, b)
1225
1226 def peek(self, n=0):
1227 self.flush()
1228 return BufferedReader.peek(self, n)
1229
1230 def read1(self, n):
1231 self.flush()
1232 return BufferedReader.read1(self, n)
1233
1234 def write(self, b):
1235 if self._read_buf:
Benjamin Peterson01a24322008-07-28 23:35:27 +00001236 # Undo readahead
Antoine Pitrou11ec65d2008-08-14 21:04:30 +00001237 with self._read_lock:
1238 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1239 self._reset_read_buf()
Christian Heimes1a6387e2008-03-26 12:49:49 +00001240 return BufferedWriter.write(self, b)
1241
1242
1243class TextIOBase(IOBase):
1244
1245 """Base class for text I/O.
1246
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001247 This class provides a character and line based interface to stream
1248 I/O. There is no readinto method because Python's character strings
1249 are immutable. There is no public constructor.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001250 """
1251
1252 def read(self, n = -1):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001253 """Read at most n characters from stream.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001254
1255 Read from underlying buffer until we have n characters or we hit EOF.
1256 If n is negative or omitted, read until EOF.
1257 """
1258 self._unsupported("read")
1259
1260 def write(self, s):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001261 """Write string s to stream."""
Christian Heimes1a6387e2008-03-26 12:49:49 +00001262 self._unsupported("write")
1263
1264 def truncate(self, pos = None):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001265 """Truncate size to pos."""
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001266 self._unsupported("truncate")
Christian Heimes1a6387e2008-03-26 12:49:49 +00001267
1268 def readline(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001269 """Read until newline or EOF.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001270
1271 Returns an empty string if EOF is hit immediately.
1272 """
1273 self._unsupported("readline")
1274
1275 @property
1276 def encoding(self):
1277 """Subclasses should override."""
1278 return None
1279
1280 @property
1281 def newlines(self):
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001282 """Line endings translated so far.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001283
1284 Only line endings translated during reading are considered.
1285
1286 Subclasses should override.
1287 """
1288 return None
1289
1290
1291class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1292 """Codec used when reading a file in universal newlines mode.
1293 It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1294 It also records the types of newlines encountered.
1295 When used with translate=False, it ensures that the newline sequence is
1296 returned in one piece.
1297 """
1298 def __init__(self, decoder, translate, errors='strict'):
1299 codecs.IncrementalDecoder.__init__(self, errors=errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001300 self.translate = translate
1301 self.decoder = decoder
1302 self.seennl = 0
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001303 self.pendingcr = False
Christian Heimes1a6387e2008-03-26 12:49:49 +00001304
1305 def decode(self, input, final=False):
1306 # decode input (with the eventual \r from a previous pass)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001307 output = self.decoder.decode(input, final=final)
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001308 if self.pendingcr and (output or final):
1309 output = "\r" + output
1310 self.pendingcr = False
Christian Heimes1a6387e2008-03-26 12:49:49 +00001311
1312 # retain last \r even when not translating data:
1313 # then readline() is sure to get \r\n in one pass
1314 if output.endswith("\r") and not final:
1315 output = output[:-1]
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001316 self.pendingcr = True
Christian Heimes1a6387e2008-03-26 12:49:49 +00001317
1318 # Record which newlines are read
1319 crlf = output.count('\r\n')
1320 cr = output.count('\r') - crlf
1321 lf = output.count('\n') - crlf
1322 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1323 | (crlf and self._CRLF)
1324
1325 if self.translate:
1326 if crlf:
1327 output = output.replace("\r\n", "\n")
1328 if cr:
1329 output = output.replace("\r", "\n")
1330
1331 return output
1332
1333 def getstate(self):
1334 buf, flag = self.decoder.getstate()
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001335 flag <<= 1
1336 if self.pendingcr:
1337 flag |= 1
1338 return buf, flag
Christian Heimes1a6387e2008-03-26 12:49:49 +00001339
1340 def setstate(self, state):
1341 buf, flag = state
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001342 self.pendingcr = bool(flag & 1)
1343 self.decoder.setstate((buf, flag >> 1))
Christian Heimes1a6387e2008-03-26 12:49:49 +00001344
1345 def reset(self):
1346 self.seennl = 0
Antoine Pitrou655fbf12008-12-14 17:40:51 +00001347 self.pendingcr = False
Christian Heimes1a6387e2008-03-26 12:49:49 +00001348 self.decoder.reset()
1349
1350 _LF = 1
1351 _CR = 2
1352 _CRLF = 4
1353
1354 @property
1355 def newlines(self):
1356 return (None,
1357 "\n",
1358 "\r",
1359 ("\r", "\n"),
1360 "\r\n",
1361 ("\n", "\r\n"),
1362 ("\r", "\r\n"),
1363 ("\r", "\n", "\r\n")
1364 )[self.seennl]
1365
1366
1367class TextIOWrapper(TextIOBase):
1368
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001369 r"""Character and line based layer over a BufferedIOBase object, buffer.
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001370
1371 encoding gives the name of the encoding that the stream will be
1372 decoded or encoded with. It defaults to locale.getpreferredencoding.
1373
1374 errors determines the strictness of encoding and decoding (see the
1375 codecs.register) and defaults to "strict".
1376
1377 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1378 handling of line endings. If it is None, universal newlines is
1379 enabled. With this enabled, on input, the lines endings '\n', '\r',
1380 or '\r\n' are translated to '\n' before being returned to the
1381 caller. Conversely, on output, '\n' is translated to the system
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001382 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001383 legal values, that newline becomes the newline when the file is read
1384 and it is returned untranslated. On output, '\n' is converted to the
1385 newline.
1386
1387 If line_buffering is True, a call to flush is implied when a call to
1388 write contains a newline character.
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389 """
1390
1391 _CHUNK_SIZE = 128
1392
1393 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1394 line_buffering=False):
1395 if newline not in (None, "", "\n", "\r", "\r\n"):
1396 raise ValueError("illegal newline value: %r" % (newline,))
1397 if encoding is None:
1398 try:
1399 encoding = os.device_encoding(buffer.fileno())
1400 except (AttributeError, UnsupportedOperation):
1401 pass
1402 if encoding is None:
1403 try:
1404 import locale
1405 except ImportError:
1406 # Importing locale may fail if Python is being built
1407 encoding = "ascii"
1408 else:
1409 encoding = locale.getpreferredencoding()
1410
Christian Heimes3784c6b2008-03-26 23:13:59 +00001411 if not isinstance(encoding, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001412 raise ValueError("invalid encoding: %r" % encoding)
1413
1414 if errors is None:
1415 errors = "strict"
1416 else:
Christian Heimes3784c6b2008-03-26 23:13:59 +00001417 if not isinstance(errors, basestring):
Christian Heimes1a6387e2008-03-26 12:49:49 +00001418 raise ValueError("invalid errors: %r" % errors)
1419
1420 self.buffer = buffer
1421 self._line_buffering = line_buffering
1422 self._encoding = encoding
1423 self._errors = errors
1424 self._readuniversal = not newline
1425 self._readtranslate = newline is None
1426 self._readnl = newline
1427 self._writetranslate = newline != ''
1428 self._writenl = newline or os.linesep
1429 self._encoder = None
1430 self._decoder = None
1431 self._decoded_chars = '' # buffer for text returned from decoder
1432 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1433 self._snapshot = None # info for reconstructing decoder state
1434 self._seekable = self._telling = self.buffer.seekable()
1435
1436 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1437 # where dec_flags is the second (integer) item of the decoder state
1438 # and next_input is the chunk of input bytes that comes next after the
1439 # snapshot point. We use this to reconstruct decoder states in tell().
1440
1441 # Naming convention:
1442 # - "bytes_..." for integer variables that count input bytes
1443 # - "chars_..." for integer variables that count decoded characters
1444
Christian Heimes1a6387e2008-03-26 12:49:49 +00001445 @property
1446 def encoding(self):
1447 return self._encoding
1448
1449 @property
1450 def errors(self):
1451 return self._errors
1452
1453 @property
1454 def line_buffering(self):
1455 return self._line_buffering
1456
1457 def seekable(self):
1458 return self._seekable
1459
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001460 def readable(self):
1461 return self.buffer.readable()
1462
1463 def writable(self):
1464 return self.buffer.writable()
1465
Christian Heimes1a6387e2008-03-26 12:49:49 +00001466 def flush(self):
1467 self.buffer.flush()
1468 self._telling = self._seekable
1469
1470 def close(self):
1471 try:
1472 self.flush()
1473 except:
1474 pass # If flush() fails, just give up
1475 self.buffer.close()
1476
1477 @property
1478 def closed(self):
1479 return self.buffer.closed
1480
Benjamin Petersonad100c32008-11-20 22:06:22 +00001481 @property
1482 def name(self):
1483 return self.buffer.name
1484
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485 def fileno(self):
1486 return self.buffer.fileno()
1487
1488 def isatty(self):
1489 return self.buffer.isatty()
1490
1491 def write(self, s):
1492 if self.closed:
1493 raise ValueError("write to closed file")
1494 if not isinstance(s, unicode):
1495 raise TypeError("can't write %s to text stream" %
1496 s.__class__.__name__)
1497 length = len(s)
1498 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1499 if haslf and self._writetranslate and self._writenl != "\n":
1500 s = s.replace("\n", self._writenl)
1501 encoder = self._encoder or self._get_encoder()
1502 # XXX What if we were just reading?
1503 b = encoder.encode(s)
1504 self.buffer.write(b)
1505 if self._line_buffering and (haslf or "\r" in s):
1506 self.flush()
1507 self._snapshot = None
1508 if self._decoder:
1509 self._decoder.reset()
1510 return length
1511
1512 def _get_encoder(self):
1513 make_encoder = codecs.getincrementalencoder(self._encoding)
1514 self._encoder = make_encoder(self._errors)
1515 return self._encoder
1516
1517 def _get_decoder(self):
1518 make_decoder = codecs.getincrementaldecoder(self._encoding)
1519 decoder = make_decoder(self._errors)
1520 if self._readuniversal:
1521 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1522 self._decoder = decoder
1523 return decoder
1524
1525 # The following three methods implement an ADT for _decoded_chars.
1526 # Text returned from the decoder is buffered here until the client
1527 # requests it by calling our read() or readline() method.
1528 def _set_decoded_chars(self, chars):
1529 """Set the _decoded_chars buffer."""
1530 self._decoded_chars = chars
1531 self._decoded_chars_used = 0
1532
1533 def _get_decoded_chars(self, n=None):
1534 """Advance into the _decoded_chars buffer."""
1535 offset = self._decoded_chars_used
1536 if n is None:
1537 chars = self._decoded_chars[offset:]
1538 else:
1539 chars = self._decoded_chars[offset:offset + n]
1540 self._decoded_chars_used += len(chars)
1541 return chars
1542
1543 def _rewind_decoded_chars(self, n):
1544 """Rewind the _decoded_chars buffer."""
1545 if self._decoded_chars_used < n:
1546 raise AssertionError("rewind decoded_chars out of bounds")
1547 self._decoded_chars_used -= n
1548
1549 def _read_chunk(self):
1550 """
1551 Read and decode the next chunk of data from the BufferedReader.
1552
1553 The return value is True unless EOF was reached. The decoded string
1554 is placed in self._decoded_chars (replacing its previous value).
1555 The entire input chunk is sent to the decoder, though some of it
1556 may remain buffered in the decoder, yet to be converted.
1557 """
1558
1559 if self._decoder is None:
1560 raise ValueError("no decoder")
1561
1562 if self._telling:
1563 # To prepare for tell(), we need to snapshot a point in the
1564 # file where the decoder's input buffer is empty.
1565
1566 dec_buffer, dec_flags = self._decoder.getstate()
1567 # Given this, we know there was a valid snapshot point
1568 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1569
1570 # Read a chunk, decode it, and put the result in self._decoded_chars.
1571 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1572 eof = not input_chunk
1573 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1574
1575 if self._telling:
1576 # At the snapshot point, len(dec_buffer) bytes before the read,
1577 # the next input to be decoded is dec_buffer + input_chunk.
1578 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1579
1580 return not eof
1581
1582 def _pack_cookie(self, position, dec_flags=0,
1583 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1584 # The meaning of a tell() cookie is: seek to position, set the
1585 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1586 # into the decoder with need_eof as the EOF flag, then skip
1587 # chars_to_skip characters of the decoded result. For most simple
1588 # decoders, tell() will often just give a byte offset in the file.
1589 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1590 (chars_to_skip<<192) | bool(need_eof)<<256)
1591
1592 def _unpack_cookie(self, bigint):
1593 rest, position = divmod(bigint, 1<<64)
1594 rest, dec_flags = divmod(rest, 1<<64)
1595 rest, bytes_to_feed = divmod(rest, 1<<64)
1596 need_eof, chars_to_skip = divmod(rest, 1<<64)
1597 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1598
1599 def tell(self):
1600 if not self._seekable:
1601 raise IOError("underlying stream is not seekable")
1602 if not self._telling:
1603 raise IOError("telling position disabled by next() call")
1604 self.flush()
1605 position = self.buffer.tell()
1606 decoder = self._decoder
1607 if decoder is None or self._snapshot is None:
1608 if self._decoded_chars:
1609 # This should never happen.
1610 raise AssertionError("pending decoded text")
1611 return position
1612
1613 # Skip backward to the snapshot point (see _read_chunk).
1614 dec_flags, next_input = self._snapshot
1615 position -= len(next_input)
1616
1617 # How many decoded characters have been used up since the snapshot?
1618 chars_to_skip = self._decoded_chars_used
1619 if chars_to_skip == 0:
1620 # We haven't moved from the snapshot point.
1621 return self._pack_cookie(position, dec_flags)
1622
1623 # Starting from the snapshot position, we will walk the decoder
1624 # forward until it gives us enough decoded characters.
1625 saved_state = decoder.getstate()
1626 try:
1627 # Note our initial start point.
1628 decoder.setstate((b'', dec_flags))
1629 start_pos = position
1630 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1631 need_eof = 0
1632
1633 # Feed the decoder one byte at a time. As we go, note the
1634 # nearest "safe start point" before the current location
1635 # (a point where the decoder has nothing buffered, so seek()
1636 # can safely start from there and advance to this location).
Amaury Forgeot d'Arc7684f852008-05-03 12:21:13 +00001637 for next_byte in next_input:
Christian Heimes1a6387e2008-03-26 12:49:49 +00001638 bytes_fed += 1
1639 chars_decoded += len(decoder.decode(next_byte))
1640 dec_buffer, dec_flags = decoder.getstate()
1641 if not dec_buffer and chars_decoded <= chars_to_skip:
1642 # Decoder buffer is empty, so this is a safe start point.
1643 start_pos += bytes_fed
1644 chars_to_skip -= chars_decoded
1645 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1646 if chars_decoded >= chars_to_skip:
1647 break
1648 else:
1649 # We didn't get enough decoded data; signal EOF to get more.
1650 chars_decoded += len(decoder.decode(b'', final=True))
1651 need_eof = 1
1652 if chars_decoded < chars_to_skip:
1653 raise IOError("can't reconstruct logical file position")
1654
1655 # The returned cookie corresponds to the last safe start point.
1656 return self._pack_cookie(
1657 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1658 finally:
1659 decoder.setstate(saved_state)
1660
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001661 def truncate(self, pos=None):
1662 self.flush()
1663 if pos is None:
1664 pos = self.tell()
1665 self.seek(pos)
1666 return self.buffer.truncate()
1667
Christian Heimes1a6387e2008-03-26 12:49:49 +00001668 def seek(self, cookie, whence=0):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001669 if self.closed:
1670 raise ValueError("tell on closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +00001671 if not self._seekable:
1672 raise IOError("underlying stream is not seekable")
1673 if whence == 1: # seek relative to current position
1674 if cookie != 0:
1675 raise IOError("can't do nonzero cur-relative seeks")
1676 # Seeking to the current position should attempt to
1677 # sync the underlying buffer with the current position.
1678 whence = 0
1679 cookie = self.tell()
1680 if whence == 2: # seek relative to end of file
1681 if cookie != 0:
1682 raise IOError("can't do nonzero end-relative seeks")
1683 self.flush()
1684 position = self.buffer.seek(0, 2)
1685 self._set_decoded_chars('')
1686 self._snapshot = None
1687 if self._decoder:
1688 self._decoder.reset()
1689 return position
1690 if whence != 0:
1691 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1692 (whence,))
1693 if cookie < 0:
1694 raise ValueError("negative seek position %r" % (cookie,))
1695 self.flush()
1696
1697 # The strategy of seek() is to go back to the safe start point
1698 # and replay the effect of read(chars_to_skip) from there.
1699 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1700 self._unpack_cookie(cookie)
1701
1702 # Seek back to the safe start point.
1703 self.buffer.seek(start_pos)
1704 self._set_decoded_chars('')
1705 self._snapshot = None
1706
1707 # Restore the decoder to its state from the safe start point.
1708 if self._decoder or dec_flags or chars_to_skip:
1709 self._decoder = self._decoder or self._get_decoder()
1710 self._decoder.setstate((b'', dec_flags))
1711 self._snapshot = (dec_flags, b'')
1712
1713 if chars_to_skip:
1714 # Just like _read_chunk, feed the decoder and save a snapshot.
1715 input_chunk = self.buffer.read(bytes_to_feed)
1716 self._set_decoded_chars(
1717 self._decoder.decode(input_chunk, need_eof))
1718 self._snapshot = (dec_flags, input_chunk)
1719
1720 # Skip chars_to_skip of the decoded characters.
1721 if len(self._decoded_chars) < chars_to_skip:
1722 raise IOError("can't restore logical file position")
1723 self._decoded_chars_used = chars_to_skip
1724
1725 return cookie
1726
1727 def read(self, n=None):
1728 if n is None:
1729 n = -1
1730 decoder = self._decoder or self._get_decoder()
1731 if n < 0:
1732 # Read everything.
1733 result = (self._get_decoded_chars() +
1734 decoder.decode(self.buffer.read(), final=True))
1735 self._set_decoded_chars('')
1736 self._snapshot = None
1737 return result
1738 else:
1739 # Keep reading chunks until we have n characters to return.
1740 eof = False
1741 result = self._get_decoded_chars(n)
1742 while len(result) < n and not eof:
1743 eof = not self._read_chunk()
1744 result += self._get_decoded_chars(n - len(result))
1745 return result
1746
1747 def next(self):
1748 self._telling = False
1749 line = self.readline()
1750 if not line:
1751 self._snapshot = None
1752 self._telling = self._seekable
1753 raise StopIteration
1754 return line
1755
1756 def readline(self, limit=None):
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001757 if self.closed:
1758 raise ValueError("read from closed file")
Christian Heimes1a6387e2008-03-26 12:49:49 +00001759 if limit is None:
1760 limit = -1
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001761 if not isinstance(limit, (int, long)):
1762 raise TypeError("limit must be an integer")
Christian Heimes1a6387e2008-03-26 12:49:49 +00001763
1764 # Grab all the decoded text (we will rewind any extra bits later).
1765 line = self._get_decoded_chars()
1766
1767 start = 0
1768 decoder = self._decoder or self._get_decoder()
1769
1770 pos = endpos = None
1771 while True:
1772 if self._readtranslate:
1773 # Newlines are already translated, only search for \n
1774 pos = line.find('\n', start)
1775 if pos >= 0:
1776 endpos = pos + 1
1777 break
1778 else:
1779 start = len(line)
1780
1781 elif self._readuniversal:
1782 # Universal newline search. Find any of \r, \r\n, \n
1783 # The decoder ensures that \r\n are not split in two pieces
1784
1785 # In C we'd look for these in parallel of course.
1786 nlpos = line.find("\n", start)
1787 crpos = line.find("\r", start)
1788 if crpos == -1:
1789 if nlpos == -1:
1790 # Nothing found
1791 start = len(line)
1792 else:
1793 # Found \n
1794 endpos = nlpos + 1
1795 break
1796 elif nlpos == -1:
1797 # Found lone \r
1798 endpos = crpos + 1
1799 break
1800 elif nlpos < crpos:
1801 # Found \n
1802 endpos = nlpos + 1
1803 break
1804 elif nlpos == crpos + 1:
1805 # Found \r\n
1806 endpos = crpos + 2
1807 break
1808 else:
1809 # Found \r
1810 endpos = crpos + 1
1811 break
1812 else:
1813 # non-universal
1814 pos = line.find(self._readnl)
1815 if pos >= 0:
1816 endpos = pos + len(self._readnl)
1817 break
1818
1819 if limit >= 0 and len(line) >= limit:
1820 endpos = limit # reached length limit
1821 break
1822
1823 # No line ending seen yet - get more data
1824 more_line = ''
1825 while self._read_chunk():
1826 if self._decoded_chars:
1827 break
1828 if self._decoded_chars:
1829 line += self._get_decoded_chars()
1830 else:
1831 # end of file
1832 self._set_decoded_chars('')
1833 self._snapshot = None
1834 return line
1835
1836 if limit >= 0 and endpos > limit:
1837 endpos = limit # don't exceed limit
1838
1839 # Rewind _decoded_chars to just after the line ending we found.
1840 self._rewind_decoded_chars(len(line) - endpos)
1841 return line[:endpos]
1842
1843 @property
1844 def newlines(self):
1845 return self._decoder.newlines if self._decoder else None
1846
1847class StringIO(TextIOWrapper):
1848
Benjamin Peterson9ae080e2008-05-04 22:39:33 +00001849 """An in-memory stream for text. The initial_value argument sets the
Benjamin Peterson7bb4d2d2008-04-13 02:01:27 +00001850 value of object. The other arguments are like those of TextIOWrapper's
1851 constructor.
1852 """
Christian Heimes1a6387e2008-03-26 12:49:49 +00001853
1854 def __init__(self, initial_value="", encoding="utf-8",
1855 errors="strict", newline="\n"):
1856 super(StringIO, self).__init__(BytesIO(),
1857 encoding=encoding,
1858 errors=errors,
1859 newline=newline)
1860 if initial_value:
1861 if not isinstance(initial_value, unicode):
1862 initial_value = unicode(initial_value)
1863 self.write(initial_value)
1864 self.seek(0)
1865
1866 def getvalue(self):
1867 self.flush()
1868 return self.buffer.getvalue().decode(self._encoding, self._errors)