blob: 35dea411e7cdd5e499e4177eda4fdcc3d72387f4 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitroud843c2d2011-02-25 21:34:39 +000017from errno import EINTR
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
Georg Brandl4d73b572011-01-13 07:13:06 +000038def open(file, mode="r", buffering=-1, encoding=None, errors=None,
39 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
Raymond Hettingercbb80892011-01-13 18:15:51 +000099 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
Raymond Hettingercbb80892011-01-13 18:15:51 +0000112 newline is a string controlling how universal newlines works (it only
113 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
114 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
Raymond Hettingercbb80892011-01-13 18:15:51 +0000130 closedfd is a bool. If closefd is False, the underlying file descriptor will
131 be kept open when the file is closed. This does not work when a file name is
132 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
Raymond Hettinger3c940242011-01-12 23:39:31 +0000290 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000291 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
Georg Brandl4d73b572011-01-13 07:13:06 +0000297 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000302 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 """
310 self._unsupported("seek")
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000313 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314 return self.seek(0, 1)
315
Georg Brandl4d73b572011-01-13 07:13:06 +0000316 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
Raymond Hettinger3c940242011-01-12 23:39:31 +0000326 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
Raymond Hettinger3c940242011-01-12 23:39:31 +0000336 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
Raymond Hettinger3c940242011-01-12 23:39:31 +0000345 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000360 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000375 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000389 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000420 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 self._checkClosed()
422 return self
423
Raymond Hettinger3c940242011-01-12 23:39:31 +0000424 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000433 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000440 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
Georg Brandl4d73b572011-01-13 07:13:06 +0000449 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000450 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451
452 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
455 The line terminator is always b'\n' for binary files; for text
456 files, the newlines argument to open can be used to select the line
457 terminator(s) recognized.
458 """
459 # For backwards compatibility, a (slowish) readline().
460 if hasattr(self, "peek"):
461 def nreadahead():
462 readahead = self.peek(1)
463 if not readahead:
464 return 1
465 n = (readahead.find(b"\n") + 1) or len(readahead)
466 if limit >= 0:
467 n = min(n, limit)
468 return n
469 else:
470 def nreadahead():
471 return 1
472 if limit is None:
473 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000474 elif not isinstance(limit, int):
475 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 res = bytearray()
477 while limit < 0 or len(res) < limit:
478 b = self.read(nreadahead())
479 if not b:
480 break
481 res += b
482 if res.endswith(b"\n"):
483 break
484 return bytes(res)
485
486 def __iter__(self):
487 self._checkClosed()
488 return self
489
490 def __next__(self):
491 line = self.readline()
492 if not line:
493 raise StopIteration
494 return line
495
496 def readlines(self, hint=None):
497 """Return a list of lines from the stream.
498
499 hint can be specified to control the number of lines read: no more
500 lines will be read if the total size (in bytes/characters) of all
501 lines so far exceeds hint.
502 """
503 if hint is None or hint <= 0:
504 return list(self)
505 n = 0
506 lines = []
507 for line in self:
508 lines.append(line)
509 n += len(line)
510 if n >= hint:
511 break
512 return lines
513
514 def writelines(self, lines):
515 self._checkClosed()
516 for line in lines:
517 self.write(line)
518
519io.IOBase.register(IOBase)
520
521
522class RawIOBase(IOBase):
523
524 """Base class for raw binary I/O."""
525
526 # The read() method is implemented by calling readinto(); derived
527 # classes that want to support read() only need to implement
528 # readinto() as a primitive operation. In general, readinto() can be
529 # more efficient than read().
530
531 # (It would be tempting to also provide an implementation of
532 # readinto() in terms of read(), in case the latter is a more suitable
533 # primitive operation, but that would lead to nasty recursion in case
534 # a subclass doesn't implement either.)
535
Georg Brandl4d73b572011-01-13 07:13:06 +0000536 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000537 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538
539 Returns an empty bytes object on EOF, or None if the object is
540 set not to block and has no data to read.
541 """
542 if n is None:
543 n = -1
544 if n < 0:
545 return self.readall()
546 b = bytearray(n.__index__())
547 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000548 if n is None:
549 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550 del b[n:]
551 return bytes(b)
552
553 def readall(self):
554 """Read until EOF, using multiple read() call."""
555 res = bytearray()
556 while True:
557 data = self.read(DEFAULT_BUFFER_SIZE)
558 if not data:
559 break
560 res += data
561 return bytes(res)
562
Raymond Hettinger3c940242011-01-12 23:39:31 +0000563 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000564 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565
Raymond Hettingercbb80892011-01-13 18:15:51 +0000566 Returns an int representing the number of bytes read (0 for EOF), or
567 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568 """
569 self._unsupported("readinto")
570
Raymond Hettinger3c940242011-01-12 23:39:31 +0000571 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572 """Write the given buffer to the IO stream.
573
574 Returns the number of bytes written, which may be less than len(b).
575 """
576 self._unsupported("write")
577
578io.RawIOBase.register(RawIOBase)
579from _io import FileIO
580RawIOBase.register(FileIO)
581
582
583class BufferedIOBase(IOBase):
584
585 """Base class for buffered IO objects.
586
587 The main difference with RawIOBase is that the read() method
588 supports omitting the size argument, and does not have a default
589 implementation that defers to readinto().
590
591 In addition, read(), readinto() and write() may raise
592 BlockingIOError if the underlying raw stream is in non-blocking
593 mode and not ready; unlike their raw counterparts, they will never
594 return None.
595
596 A typical implementation should not inherit from a RawIOBase
597 implementation, but wrap one.
598 """
599
Georg Brandl4d73b572011-01-13 07:13:06 +0000600 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000601 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602
603 If the argument is omitted, None, or negative, reads and
604 returns all data until EOF.
605
606 If the argument is positive, and the underlying raw stream is
607 not 'interactive', multiple raw reads may be issued to satisfy
608 the byte count (unless EOF is reached first). But for
609 interactive raw streams (XXX and for pipes?), at most one raw
610 read will be issued, and a short result does not imply that
611 EOF is imminent.
612
613 Returns an empty bytes array on EOF.
614
615 Raises BlockingIOError if the underlying raw stream has no
616 data at the moment.
617 """
618 self._unsupported("read")
619
Georg Brandl4d73b572011-01-13 07:13:06 +0000620 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000621 """Read up to n bytes with at most one read() system call,
622 where n is an int.
623 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 self._unsupported("read1")
625
Raymond Hettinger3c940242011-01-12 23:39:31 +0000626 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000627 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000628
629 Like read(), this may issue multiple reads to the underlying raw
630 stream, unless the latter is 'interactive'.
631
Raymond Hettingercbb80892011-01-13 18:15:51 +0000632 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633
634 Raises BlockingIOError if the underlying raw stream has no
635 data at the moment.
636 """
637 # XXX This ought to work with anything that supports the buffer API
638 data = self.read(len(b))
639 n = len(data)
640 try:
641 b[:n] = data
642 except TypeError as err:
643 import array
644 if not isinstance(b, array.array):
645 raise err
646 b[:n] = array.array('b', data)
647 return n
648
Raymond Hettinger3c940242011-01-12 23:39:31 +0000649 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000650 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651
652 Return the number of bytes written, which is never less than
653 len(b).
654
655 Raises BlockingIOError if the buffer is full and the
656 underlying raw stream cannot accept more data at the moment.
657 """
658 self._unsupported("write")
659
Raymond Hettinger3c940242011-01-12 23:39:31 +0000660 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000661 """
662 Separate the underlying raw stream from the buffer and return it.
663
664 After the raw stream has been detached, the buffer is in an unusable
665 state.
666 """
667 self._unsupported("detach")
668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669io.BufferedIOBase.register(BufferedIOBase)
670
671
672class _BufferedIOMixin(BufferedIOBase):
673
674 """A mixin implementation of BufferedIOBase with an underlying raw stream.
675
676 This passes most requests on to the underlying raw stream. It
677 does *not* provide implementations of read(), readinto() or
678 write().
679 """
680
681 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000682 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000683
684 ### Positioning ###
685
686 def seek(self, pos, whence=0):
687 new_position = self.raw.seek(pos, whence)
688 if new_position < 0:
689 raise IOError("seek() returned an invalid position")
690 return new_position
691
692 def tell(self):
693 pos = self.raw.tell()
694 if pos < 0:
695 raise IOError("tell() returned an invalid position")
696 return pos
697
698 def truncate(self, pos=None):
699 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
700 # and a flush may be necessary to synch both views of the current
701 # file state.
702 self.flush()
703
704 if pos is None:
705 pos = self.tell()
706 # XXX: Should seek() be used, instead of passing the position
707 # XXX directly to truncate?
708 return self.raw.truncate(pos)
709
710 ### Flush and close ###
711
712 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000713 if self.closed:
714 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self.raw.flush()
716
717 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000718 if self.raw is not None and not self.closed:
719 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720 self.raw.close()
721
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000722 def detach(self):
723 if self.raw is None:
724 raise ValueError("raw stream already detached")
725 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000726 raw = self._raw
727 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000728 return raw
729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730 ### Inquiries ###
731
732 def seekable(self):
733 return self.raw.seekable()
734
735 def readable(self):
736 return self.raw.readable()
737
738 def writable(self):
739 return self.raw.writable()
740
741 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000742 def raw(self):
743 return self._raw
744
745 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746 def closed(self):
747 return self.raw.closed
748
749 @property
750 def name(self):
751 return self.raw.name
752
753 @property
754 def mode(self):
755 return self.raw.mode
756
Antoine Pitrou243757e2010-11-05 21:15:39 +0000757 def __getstate__(self):
758 raise TypeError("can not serialize a '{0}' object"
759 .format(self.__class__.__name__))
760
Antoine Pitrou716c4442009-05-23 19:04:03 +0000761 def __repr__(self):
762 clsname = self.__class__.__name__
763 try:
764 name = self.name
765 except AttributeError:
766 return "<_pyio.{0}>".format(clsname)
767 else:
768 return "<_pyio.{0} name={1!r}>".format(clsname, name)
769
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770 ### Lower-level APIs ###
771
772 def fileno(self):
773 return self.raw.fileno()
774
775 def isatty(self):
776 return self.raw.isatty()
777
778
779class BytesIO(BufferedIOBase):
780
781 """Buffered I/O implementation using an in-memory bytes buffer."""
782
783 def __init__(self, initial_bytes=None):
784 buf = bytearray()
785 if initial_bytes is not None:
786 buf += initial_bytes
787 self._buffer = buf
788 self._pos = 0
789
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000790 def __getstate__(self):
791 if self.closed:
792 raise ValueError("__getstate__ on closed file")
793 return self.__dict__.copy()
794
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 def getvalue(self):
796 """Return the bytes value (contents) of the buffer
797 """
798 if self.closed:
799 raise ValueError("getvalue on closed file")
800 return bytes(self._buffer)
801
Antoine Pitrou972ee132010-09-06 18:48:21 +0000802 def getbuffer(self):
803 """Return a readable and writable view of the buffer.
804 """
805 return memoryview(self._buffer)
806
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807 def read(self, n=None):
808 if self.closed:
809 raise ValueError("read from closed file")
810 if n is None:
811 n = -1
812 if n < 0:
813 n = len(self._buffer)
814 if len(self._buffer) <= self._pos:
815 return b""
816 newpos = min(len(self._buffer), self._pos + n)
817 b = self._buffer[self._pos : newpos]
818 self._pos = newpos
819 return bytes(b)
820
821 def read1(self, n):
822 """This is the same as read.
823 """
824 return self.read(n)
825
826 def write(self, b):
827 if self.closed:
828 raise ValueError("write to closed file")
829 if isinstance(b, str):
830 raise TypeError("can't write str to binary stream")
831 n = len(b)
832 if n == 0:
833 return 0
834 pos = self._pos
835 if pos > len(self._buffer):
836 # Inserts null bytes between the current end of the file
837 # and the new write position.
838 padding = b'\x00' * (pos - len(self._buffer))
839 self._buffer += padding
840 self._buffer[pos:pos + n] = b
841 self._pos += n
842 return n
843
844 def seek(self, pos, whence=0):
845 if self.closed:
846 raise ValueError("seek on closed file")
847 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000848 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 except AttributeError as err:
850 raise TypeError("an integer is required") from err
851 if whence == 0:
852 if pos < 0:
853 raise ValueError("negative seek position %r" % (pos,))
854 self._pos = pos
855 elif whence == 1:
856 self._pos = max(0, self._pos + pos)
857 elif whence == 2:
858 self._pos = max(0, len(self._buffer) + pos)
859 else:
860 raise ValueError("invalid whence value")
861 return self._pos
862
863 def tell(self):
864 if self.closed:
865 raise ValueError("tell on closed file")
866 return self._pos
867
868 def truncate(self, pos=None):
869 if self.closed:
870 raise ValueError("truncate on closed file")
871 if pos is None:
872 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000873 else:
874 try:
875 pos.__index__
876 except AttributeError as err:
877 raise TypeError("an integer is required") from err
878 if pos < 0:
879 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000880 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000881 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000882
883 def readable(self):
884 return True
885
886 def writable(self):
887 return True
888
889 def seekable(self):
890 return True
891
892
893class BufferedReader(_BufferedIOMixin):
894
895 """BufferedReader(raw[, buffer_size])
896
897 A buffer for a readable, sequential BaseRawIO object.
898
899 The constructor creates a BufferedReader for the given readable raw
900 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
901 is used.
902 """
903
904 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
905 """Create a new buffered reader using the given readable raw IO object.
906 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000907 if not raw.readable():
908 raise IOError('"raw" argument must be readable.')
909
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 _BufferedIOMixin.__init__(self, raw)
911 if buffer_size <= 0:
912 raise ValueError("invalid buffer size")
913 self.buffer_size = buffer_size
914 self._reset_read_buf()
915 self._read_lock = Lock()
916
917 def _reset_read_buf(self):
918 self._read_buf = b""
919 self._read_pos = 0
920
921 def read(self, n=None):
922 """Read n bytes.
923
924 Returns exactly n bytes of data unless the underlying raw IO
925 stream reaches EOF or if the call would block in non-blocking
926 mode. If n is negative, read until EOF or until read() would
927 block.
928 """
929 if n is not None and n < -1:
930 raise ValueError("invalid number of bytes to read")
931 with self._read_lock:
932 return self._read_unlocked(n)
933
934 def _read_unlocked(self, n=None):
935 nodata_val = b""
936 empty_values = (b"", None)
937 buf = self._read_buf
938 pos = self._read_pos
939
940 # Special case for when the number of bytes to read is unspecified.
941 if n is None or n == -1:
942 self._reset_read_buf()
943 chunks = [buf[pos:]] # Strip the consumed bytes.
944 current_size = 0
945 while True:
946 # Read until EOF or until read() would block.
Antoine Pitroud843c2d2011-02-25 21:34:39 +0000947 try:
948 chunk = self.raw.read()
949 except IOError as e:
950 if e.errno != EINTR:
951 raise
952 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 if chunk in empty_values:
954 nodata_val = chunk
955 break
956 current_size += len(chunk)
957 chunks.append(chunk)
958 return b"".join(chunks) or nodata_val
959
960 # The number of bytes to read is specified, return at most n bytes.
961 avail = len(buf) - pos # Length of the available buffered data.
962 if n <= avail:
963 # Fast path: the data to read is fully buffered.
964 self._read_pos += n
965 return buf[pos:pos+n]
966 # Slow path: read from the stream until enough bytes are read,
967 # or until an EOF occurs or until read() would block.
968 chunks = [buf[pos:]]
969 wanted = max(self.buffer_size, n)
970 while avail < n:
Antoine Pitroud843c2d2011-02-25 21:34:39 +0000971 try:
972 chunk = self.raw.read(wanted)
973 except IOError as e:
974 if e.errno != EINTR:
975 raise
976 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000977 if chunk in empty_values:
978 nodata_val = chunk
979 break
980 avail += len(chunk)
981 chunks.append(chunk)
982 # n is more then avail only when an EOF occurred or when
983 # read() would have blocked.
984 n = min(n, avail)
985 out = b"".join(chunks)
986 self._read_buf = out[n:] # Save the extra data in the buffer.
987 self._read_pos = 0
988 return out[:n] if out else nodata_val
989
990 def peek(self, n=0):
991 """Returns buffered bytes without advancing the position.
992
993 The argument indicates a desired minimal number of bytes; we
994 do at most one raw read to satisfy it. We never return more
995 than self.buffer_size.
996 """
997 with self._read_lock:
998 return self._peek_unlocked(n)
999
1000 def _peek_unlocked(self, n=0):
1001 want = min(n, self.buffer_size)
1002 have = len(self._read_buf) - self._read_pos
1003 if have < want or have <= 0:
1004 to_read = self.buffer_size - have
Antoine Pitroud843c2d2011-02-25 21:34:39 +00001005 while True:
1006 try:
1007 current = self.raw.read(to_read)
1008 except IOError as e:
1009 if e.errno != EINTR:
1010 raise
1011 continue
1012 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if current:
1014 self._read_buf = self._read_buf[self._read_pos:] + current
1015 self._read_pos = 0
1016 return self._read_buf[self._read_pos:]
1017
1018 def read1(self, n):
1019 """Reads up to n bytes, with at most one read() system call."""
1020 # Returns up to n bytes. If at least one byte is buffered, we
1021 # only return buffered bytes. Otherwise, we do one raw read.
1022 if n < 0:
1023 raise ValueError("number of bytes to read must be positive")
1024 if n == 0:
1025 return b""
1026 with self._read_lock:
1027 self._peek_unlocked(1)
1028 return self._read_unlocked(
1029 min(n, len(self._read_buf) - self._read_pos))
1030
1031 def tell(self):
1032 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1033
1034 def seek(self, pos, whence=0):
1035 if not (0 <= whence <= 2):
1036 raise ValueError("invalid whence value")
1037 with self._read_lock:
1038 if whence == 1:
1039 pos -= len(self._read_buf) - self._read_pos
1040 pos = _BufferedIOMixin.seek(self, pos, whence)
1041 self._reset_read_buf()
1042 return pos
1043
1044class BufferedWriter(_BufferedIOMixin):
1045
1046 """A buffer for a writeable sequential RawIO object.
1047
1048 The constructor creates a BufferedWriter for the given writeable raw
1049 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001050 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001051 """
1052
Benjamin Peterson59406a92009-03-26 17:10:29 +00001053 _warning_stack_offset = 2
1054
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 def __init__(self, raw,
1056 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001057 if not raw.writable():
1058 raise IOError('"raw" argument must be writable.')
1059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 _BufferedIOMixin.__init__(self, raw)
1061 if buffer_size <= 0:
1062 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001063 if max_buffer_size is not None:
1064 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1065 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 self._write_buf = bytearray()
1068 self._write_lock = Lock()
1069
1070 def write(self, b):
1071 if self.closed:
1072 raise ValueError("write to closed file")
1073 if isinstance(b, str):
1074 raise TypeError("can't write str to binary stream")
1075 with self._write_lock:
1076 # XXX we can implement some more tricks to try and avoid
1077 # partial writes
1078 if len(self._write_buf) > self.buffer_size:
1079 # We're full, so let's pre-flush the buffer
1080 try:
1081 self._flush_unlocked()
1082 except BlockingIOError as e:
1083 # We can't accept anything else.
1084 # XXX Why not just let the exception pass through?
1085 raise BlockingIOError(e.errno, e.strerror, 0)
1086 before = len(self._write_buf)
1087 self._write_buf.extend(b)
1088 written = len(self._write_buf) - before
1089 if len(self._write_buf) > self.buffer_size:
1090 try:
1091 self._flush_unlocked()
1092 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001093 if len(self._write_buf) > self.buffer_size:
1094 # We've hit the buffer_size. We have to accept a partial
1095 # write and cut back our buffer.
1096 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001098 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001099 raise BlockingIOError(e.errno, e.strerror, written)
1100 return written
1101
1102 def truncate(self, pos=None):
1103 with self._write_lock:
1104 self._flush_unlocked()
1105 if pos is None:
1106 pos = self.raw.tell()
1107 return self.raw.truncate(pos)
1108
1109 def flush(self):
1110 with self._write_lock:
1111 self._flush_unlocked()
1112
1113 def _flush_unlocked(self):
1114 if self.closed:
1115 raise ValueError("flush of closed file")
1116 written = 0
1117 try:
1118 while self._write_buf:
Antoine Pitroud843c2d2011-02-25 21:34:39 +00001119 try:
1120 n = self.raw.write(self._write_buf)
1121 except IOError as e:
1122 if e.errno != EINTR:
1123 raise
1124 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 if n > len(self._write_buf) or n < 0:
1126 raise IOError("write() returned incorrect number of bytes")
1127 del self._write_buf[:n]
1128 written += n
1129 except BlockingIOError as e:
1130 n = e.characters_written
1131 del self._write_buf[:n]
1132 written += n
1133 raise BlockingIOError(e.errno, e.strerror, written)
1134
1135 def tell(self):
1136 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1137
1138 def seek(self, pos, whence=0):
1139 if not (0 <= whence <= 2):
1140 raise ValueError("invalid whence")
1141 with self._write_lock:
1142 self._flush_unlocked()
1143 return _BufferedIOMixin.seek(self, pos, whence)
1144
1145
1146class BufferedRWPair(BufferedIOBase):
1147
1148 """A buffered reader and writer object together.
1149
1150 A buffered reader object and buffered writer object put together to
1151 form a sequential IO object that can read and write. This is typically
1152 used with a socket or two-way pipe.
1153
1154 reader and writer are RawIOBase objects that are readable and
1155 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001156 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 """
1158
1159 # XXX The usefulness of this (compared to having two separate IO
1160 # objects) is questionable.
1161
1162 def __init__(self, reader, writer,
1163 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1164 """Constructor.
1165
1166 The arguments are two RawIO instances.
1167 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001168 if max_buffer_size is not None:
1169 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001170
1171 if not reader.readable():
1172 raise IOError('"reader" argument must be readable.')
1173
1174 if not writer.writable():
1175 raise IOError('"writer" argument must be writable.')
1176
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001178 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179
1180 def read(self, n=None):
1181 if n is None:
1182 n = -1
1183 return self.reader.read(n)
1184
1185 def readinto(self, b):
1186 return self.reader.readinto(b)
1187
1188 def write(self, b):
1189 return self.writer.write(b)
1190
1191 def peek(self, n=0):
1192 return self.reader.peek(n)
1193
1194 def read1(self, n):
1195 return self.reader.read1(n)
1196
1197 def readable(self):
1198 return self.reader.readable()
1199
1200 def writable(self):
1201 return self.writer.writable()
1202
1203 def flush(self):
1204 return self.writer.flush()
1205
1206 def close(self):
1207 self.writer.close()
1208 self.reader.close()
1209
1210 def isatty(self):
1211 return self.reader.isatty() or self.writer.isatty()
1212
1213 @property
1214 def closed(self):
1215 return self.writer.closed
1216
1217
1218class BufferedRandom(BufferedWriter, BufferedReader):
1219
1220 """A buffered interface to random access streams.
1221
1222 The constructor creates a reader and writer for a seekable stream,
1223 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001224 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225 """
1226
Benjamin Peterson59406a92009-03-26 17:10:29 +00001227 _warning_stack_offset = 3
1228
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 def __init__(self, raw,
1230 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1231 raw._checkSeekable()
1232 BufferedReader.__init__(self, raw, buffer_size)
1233 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1234
1235 def seek(self, pos, whence=0):
1236 if not (0 <= whence <= 2):
1237 raise ValueError("invalid whence")
1238 self.flush()
1239 if self._read_buf:
1240 # Undo read ahead.
1241 with self._read_lock:
1242 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1243 # First do the raw seek, then empty the read buffer, so that
1244 # if the raw seek fails, we don't lose buffered data forever.
1245 pos = self.raw.seek(pos, whence)
1246 with self._read_lock:
1247 self._reset_read_buf()
1248 if pos < 0:
1249 raise IOError("seek() returned invalid position")
1250 return pos
1251
1252 def tell(self):
1253 if self._write_buf:
1254 return BufferedWriter.tell(self)
1255 else:
1256 return BufferedReader.tell(self)
1257
1258 def truncate(self, pos=None):
1259 if pos is None:
1260 pos = self.tell()
1261 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001262 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263
1264 def read(self, n=None):
1265 if n is None:
1266 n = -1
1267 self.flush()
1268 return BufferedReader.read(self, n)
1269
1270 def readinto(self, b):
1271 self.flush()
1272 return BufferedReader.readinto(self, b)
1273
1274 def peek(self, n=0):
1275 self.flush()
1276 return BufferedReader.peek(self, n)
1277
1278 def read1(self, n):
1279 self.flush()
1280 return BufferedReader.read1(self, n)
1281
1282 def write(self, b):
1283 if self._read_buf:
1284 # Undo readahead
1285 with self._read_lock:
1286 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1287 self._reset_read_buf()
1288 return BufferedWriter.write(self, b)
1289
1290
1291class TextIOBase(IOBase):
1292
1293 """Base class for text I/O.
1294
1295 This class provides a character and line based interface to stream
1296 I/O. There is no readinto method because Python's character strings
1297 are immutable. There is no public constructor.
1298 """
1299
Georg Brandl4d73b572011-01-13 07:13:06 +00001300 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001301 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
1303 Read from underlying buffer until we have n characters or we hit EOF.
1304 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001305
1306 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 """
1308 self._unsupported("read")
1309
Raymond Hettinger3c940242011-01-12 23:39:31 +00001310 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001311 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 self._unsupported("write")
1313
Georg Brandl4d73b572011-01-13 07:13:06 +00001314 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001315 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 self._unsupported("truncate")
1317
Raymond Hettinger3c940242011-01-12 23:39:31 +00001318 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 """Read until newline or EOF.
1320
1321 Returns an empty string if EOF is hit immediately.
1322 """
1323 self._unsupported("readline")
1324
Raymond Hettinger3c940242011-01-12 23:39:31 +00001325 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001326 """
1327 Separate the underlying buffer from the TextIOBase and return it.
1328
1329 After the underlying buffer has been detached, the TextIO is in an
1330 unusable state.
1331 """
1332 self._unsupported("detach")
1333
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334 @property
1335 def encoding(self):
1336 """Subclasses should override."""
1337 return None
1338
1339 @property
1340 def newlines(self):
1341 """Line endings translated so far.
1342
1343 Only line endings translated during reading are considered.
1344
1345 Subclasses should override.
1346 """
1347 return None
1348
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001349 @property
1350 def errors(self):
1351 """Error setting of the decoder or encoder.
1352
1353 Subclasses should override."""
1354 return None
1355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356io.TextIOBase.register(TextIOBase)
1357
1358
1359class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1360 r"""Codec used when reading a file in universal newlines mode. It wraps
1361 another incremental decoder, translating \r\n and \r into \n. It also
1362 records the types of newlines encountered. When used with
1363 translate=False, it ensures that the newline sequence is returned in
1364 one piece.
1365 """
1366 def __init__(self, decoder, translate, errors='strict'):
1367 codecs.IncrementalDecoder.__init__(self, errors=errors)
1368 self.translate = translate
1369 self.decoder = decoder
1370 self.seennl = 0
1371 self.pendingcr = False
1372
1373 def decode(self, input, final=False):
1374 # decode input (with the eventual \r from a previous pass)
1375 if self.decoder is None:
1376 output = input
1377 else:
1378 output = self.decoder.decode(input, final=final)
1379 if self.pendingcr and (output or final):
1380 output = "\r" + output
1381 self.pendingcr = False
1382
1383 # retain last \r even when not translating data:
1384 # then readline() is sure to get \r\n in one pass
1385 if output.endswith("\r") and not final:
1386 output = output[:-1]
1387 self.pendingcr = True
1388
1389 # Record which newlines are read
1390 crlf = output.count('\r\n')
1391 cr = output.count('\r') - crlf
1392 lf = output.count('\n') - crlf
1393 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1394 | (crlf and self._CRLF)
1395
1396 if self.translate:
1397 if crlf:
1398 output = output.replace("\r\n", "\n")
1399 if cr:
1400 output = output.replace("\r", "\n")
1401
1402 return output
1403
1404 def getstate(self):
1405 if self.decoder is None:
1406 buf = b""
1407 flag = 0
1408 else:
1409 buf, flag = self.decoder.getstate()
1410 flag <<= 1
1411 if self.pendingcr:
1412 flag |= 1
1413 return buf, flag
1414
1415 def setstate(self, state):
1416 buf, flag = state
1417 self.pendingcr = bool(flag & 1)
1418 if self.decoder is not None:
1419 self.decoder.setstate((buf, flag >> 1))
1420
1421 def reset(self):
1422 self.seennl = 0
1423 self.pendingcr = False
1424 if self.decoder is not None:
1425 self.decoder.reset()
1426
1427 _LF = 1
1428 _CR = 2
1429 _CRLF = 4
1430
1431 @property
1432 def newlines(self):
1433 return (None,
1434 "\n",
1435 "\r",
1436 ("\r", "\n"),
1437 "\r\n",
1438 ("\n", "\r\n"),
1439 ("\r", "\r\n"),
1440 ("\r", "\n", "\r\n")
1441 )[self.seennl]
1442
1443
1444class TextIOWrapper(TextIOBase):
1445
1446 r"""Character and line based layer over a BufferedIOBase object, buffer.
1447
1448 encoding gives the name of the encoding that the stream will be
1449 decoded or encoded with. It defaults to locale.getpreferredencoding.
1450
1451 errors determines the strictness of encoding and decoding (see the
1452 codecs.register) and defaults to "strict".
1453
1454 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1455 handling of line endings. If it is None, universal newlines is
1456 enabled. With this enabled, on input, the lines endings '\n', '\r',
1457 or '\r\n' are translated to '\n' before being returned to the
1458 caller. Conversely, on output, '\n' is translated to the system
1459 default line seperator, os.linesep. If newline is any other of its
1460 legal values, that newline becomes the newline when the file is read
1461 and it is returned untranslated. On output, '\n' is converted to the
1462 newline.
1463
1464 If line_buffering is True, a call to flush is implied when a call to
1465 write contains a newline character.
1466 """
1467
1468 _CHUNK_SIZE = 2048
1469
1470 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1471 line_buffering=False):
1472 if newline is not None and not isinstance(newline, str):
1473 raise TypeError("illegal newline type: %r" % (type(newline),))
1474 if newline not in (None, "", "\n", "\r", "\r\n"):
1475 raise ValueError("illegal newline value: %r" % (newline,))
1476 if encoding is None:
1477 try:
1478 encoding = os.device_encoding(buffer.fileno())
1479 except (AttributeError, UnsupportedOperation):
1480 pass
1481 if encoding is None:
1482 try:
1483 import locale
1484 except ImportError:
1485 # Importing locale may fail if Python is being built
1486 encoding = "ascii"
1487 else:
1488 encoding = locale.getpreferredencoding()
1489
1490 if not isinstance(encoding, str):
1491 raise ValueError("invalid encoding: %r" % encoding)
1492
1493 if errors is None:
1494 errors = "strict"
1495 else:
1496 if not isinstance(errors, str):
1497 raise ValueError("invalid errors: %r" % errors)
1498
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001499 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 self._line_buffering = line_buffering
1501 self._encoding = encoding
1502 self._errors = errors
1503 self._readuniversal = not newline
1504 self._readtranslate = newline is None
1505 self._readnl = newline
1506 self._writetranslate = newline != ''
1507 self._writenl = newline or os.linesep
1508 self._encoder = None
1509 self._decoder = None
1510 self._decoded_chars = '' # buffer for text returned from decoder
1511 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1512 self._snapshot = None # info for reconstructing decoder state
1513 self._seekable = self._telling = self.buffer.seekable()
1514
Antoine Pitroue4501852009-05-14 18:55:55 +00001515 if self._seekable and self.writable():
1516 position = self.buffer.tell()
1517 if position != 0:
1518 try:
1519 self._get_encoder().setstate(0)
1520 except LookupError:
1521 # Sometimes the encoder doesn't exist
1522 pass
1523
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1525 # where dec_flags is the second (integer) item of the decoder state
1526 # and next_input is the chunk of input bytes that comes next after the
1527 # snapshot point. We use this to reconstruct decoder states in tell().
1528
1529 # Naming convention:
1530 # - "bytes_..." for integer variables that count input bytes
1531 # - "chars_..." for integer variables that count decoded characters
1532
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001533 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001534 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001535 try:
1536 name = self.name
1537 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001538 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001539 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001540 result += " name={0!r}".format(name)
1541 try:
1542 mode = self.mode
1543 except AttributeError:
1544 pass
1545 else:
1546 result += " mode={0!r}".format(mode)
1547 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001548
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 @property
1550 def encoding(self):
1551 return self._encoding
1552
1553 @property
1554 def errors(self):
1555 return self._errors
1556
1557 @property
1558 def line_buffering(self):
1559 return self._line_buffering
1560
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001561 @property
1562 def buffer(self):
1563 return self._buffer
1564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 def seekable(self):
1566 return self._seekable
1567
1568 def readable(self):
1569 return self.buffer.readable()
1570
1571 def writable(self):
1572 return self.buffer.writable()
1573
1574 def flush(self):
1575 self.buffer.flush()
1576 self._telling = self._seekable
1577
1578 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001579 if self.buffer is not None and not self.closed:
1580 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001581 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582
1583 @property
1584 def closed(self):
1585 return self.buffer.closed
1586
1587 @property
1588 def name(self):
1589 return self.buffer.name
1590
1591 def fileno(self):
1592 return self.buffer.fileno()
1593
1594 def isatty(self):
1595 return self.buffer.isatty()
1596
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001597 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001598 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 if self.closed:
1600 raise ValueError("write to closed file")
1601 if not isinstance(s, str):
1602 raise TypeError("can't write %s to text stream" %
1603 s.__class__.__name__)
1604 length = len(s)
1605 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1606 if haslf and self._writetranslate and self._writenl != "\n":
1607 s = s.replace("\n", self._writenl)
1608 encoder = self._encoder or self._get_encoder()
1609 # XXX What if we were just reading?
1610 b = encoder.encode(s)
1611 self.buffer.write(b)
1612 if self._line_buffering and (haslf or "\r" in s):
1613 self.flush()
1614 self._snapshot = None
1615 if self._decoder:
1616 self._decoder.reset()
1617 return length
1618
1619 def _get_encoder(self):
1620 make_encoder = codecs.getincrementalencoder(self._encoding)
1621 self._encoder = make_encoder(self._errors)
1622 return self._encoder
1623
1624 def _get_decoder(self):
1625 make_decoder = codecs.getincrementaldecoder(self._encoding)
1626 decoder = make_decoder(self._errors)
1627 if self._readuniversal:
1628 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1629 self._decoder = decoder
1630 return decoder
1631
1632 # The following three methods implement an ADT for _decoded_chars.
1633 # Text returned from the decoder is buffered here until the client
1634 # requests it by calling our read() or readline() method.
1635 def _set_decoded_chars(self, chars):
1636 """Set the _decoded_chars buffer."""
1637 self._decoded_chars = chars
1638 self._decoded_chars_used = 0
1639
1640 def _get_decoded_chars(self, n=None):
1641 """Advance into the _decoded_chars buffer."""
1642 offset = self._decoded_chars_used
1643 if n is None:
1644 chars = self._decoded_chars[offset:]
1645 else:
1646 chars = self._decoded_chars[offset:offset + n]
1647 self._decoded_chars_used += len(chars)
1648 return chars
1649
1650 def _rewind_decoded_chars(self, n):
1651 """Rewind the _decoded_chars buffer."""
1652 if self._decoded_chars_used < n:
1653 raise AssertionError("rewind decoded_chars out of bounds")
1654 self._decoded_chars_used -= n
1655
1656 def _read_chunk(self):
1657 """
1658 Read and decode the next chunk of data from the BufferedReader.
1659 """
1660
1661 # The return value is True unless EOF was reached. The decoded
1662 # string is placed in self._decoded_chars (replacing its previous
1663 # value). The entire input chunk is sent to the decoder, though
1664 # some of it may remain buffered in the decoder, yet to be
1665 # converted.
1666
1667 if self._decoder is None:
1668 raise ValueError("no decoder")
1669
1670 if self._telling:
1671 # To prepare for tell(), we need to snapshot a point in the
1672 # file where the decoder's input buffer is empty.
1673
1674 dec_buffer, dec_flags = self._decoder.getstate()
1675 # Given this, we know there was a valid snapshot point
1676 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1677
1678 # Read a chunk, decode it, and put the result in self._decoded_chars.
1679 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1680 eof = not input_chunk
1681 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1682
1683 if self._telling:
1684 # At the snapshot point, len(dec_buffer) bytes before the read,
1685 # the next input to be decoded is dec_buffer + input_chunk.
1686 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1687
1688 return not eof
1689
1690 def _pack_cookie(self, position, dec_flags=0,
1691 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1692 # The meaning of a tell() cookie is: seek to position, set the
1693 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1694 # into the decoder with need_eof as the EOF flag, then skip
1695 # chars_to_skip characters of the decoded result. For most simple
1696 # decoders, tell() will often just give a byte offset in the file.
1697 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1698 (chars_to_skip<<192) | bool(need_eof)<<256)
1699
1700 def _unpack_cookie(self, bigint):
1701 rest, position = divmod(bigint, 1<<64)
1702 rest, dec_flags = divmod(rest, 1<<64)
1703 rest, bytes_to_feed = divmod(rest, 1<<64)
1704 need_eof, chars_to_skip = divmod(rest, 1<<64)
1705 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1706
1707 def tell(self):
1708 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001709 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 if not self._telling:
1711 raise IOError("telling position disabled by next() call")
1712 self.flush()
1713 position = self.buffer.tell()
1714 decoder = self._decoder
1715 if decoder is None or self._snapshot is None:
1716 if self._decoded_chars:
1717 # This should never happen.
1718 raise AssertionError("pending decoded text")
1719 return position
1720
1721 # Skip backward to the snapshot point (see _read_chunk).
1722 dec_flags, next_input = self._snapshot
1723 position -= len(next_input)
1724
1725 # How many decoded characters have been used up since the snapshot?
1726 chars_to_skip = self._decoded_chars_used
1727 if chars_to_skip == 0:
1728 # We haven't moved from the snapshot point.
1729 return self._pack_cookie(position, dec_flags)
1730
1731 # Starting from the snapshot position, we will walk the decoder
1732 # forward until it gives us enough decoded characters.
1733 saved_state = decoder.getstate()
1734 try:
1735 # Note our initial start point.
1736 decoder.setstate((b'', dec_flags))
1737 start_pos = position
1738 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1739 need_eof = 0
1740
1741 # Feed the decoder one byte at a time. As we go, note the
1742 # nearest "safe start point" before the current location
1743 # (a point where the decoder has nothing buffered, so seek()
1744 # can safely start from there and advance to this location).
1745 next_byte = bytearray(1)
1746 for next_byte[0] in next_input:
1747 bytes_fed += 1
1748 chars_decoded += len(decoder.decode(next_byte))
1749 dec_buffer, dec_flags = decoder.getstate()
1750 if not dec_buffer and chars_decoded <= chars_to_skip:
1751 # Decoder buffer is empty, so this is a safe start point.
1752 start_pos += bytes_fed
1753 chars_to_skip -= chars_decoded
1754 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1755 if chars_decoded >= chars_to_skip:
1756 break
1757 else:
1758 # We didn't get enough decoded data; signal EOF to get more.
1759 chars_decoded += len(decoder.decode(b'', final=True))
1760 need_eof = 1
1761 if chars_decoded < chars_to_skip:
1762 raise IOError("can't reconstruct logical file position")
1763
1764 # The returned cookie corresponds to the last safe start point.
1765 return self._pack_cookie(
1766 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1767 finally:
1768 decoder.setstate(saved_state)
1769
1770 def truncate(self, pos=None):
1771 self.flush()
1772 if pos is None:
1773 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001774 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001776 def detach(self):
1777 if self.buffer is None:
1778 raise ValueError("buffer is already detached")
1779 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001780 buffer = self._buffer
1781 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001782 return buffer
1783
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 def seek(self, cookie, whence=0):
1785 if self.closed:
1786 raise ValueError("tell on closed file")
1787 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001788 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 if whence == 1: # seek relative to current position
1790 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001791 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001792 # Seeking to the current position should attempt to
1793 # sync the underlying buffer with the current position.
1794 whence = 0
1795 cookie = self.tell()
1796 if whence == 2: # seek relative to end of file
1797 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001798 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 self.flush()
1800 position = self.buffer.seek(0, 2)
1801 self._set_decoded_chars('')
1802 self._snapshot = None
1803 if self._decoder:
1804 self._decoder.reset()
1805 return position
1806 if whence != 0:
1807 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1808 (whence,))
1809 if cookie < 0:
1810 raise ValueError("negative seek position %r" % (cookie,))
1811 self.flush()
1812
1813 # The strategy of seek() is to go back to the safe start point
1814 # and replay the effect of read(chars_to_skip) from there.
1815 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1816 self._unpack_cookie(cookie)
1817
1818 # Seek back to the safe start point.
1819 self.buffer.seek(start_pos)
1820 self._set_decoded_chars('')
1821 self._snapshot = None
1822
1823 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001824 if cookie == 0 and self._decoder:
1825 self._decoder.reset()
1826 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 self._decoder = self._decoder or self._get_decoder()
1828 self._decoder.setstate((b'', dec_flags))
1829 self._snapshot = (dec_flags, b'')
1830
1831 if chars_to_skip:
1832 # Just like _read_chunk, feed the decoder and save a snapshot.
1833 input_chunk = self.buffer.read(bytes_to_feed)
1834 self._set_decoded_chars(
1835 self._decoder.decode(input_chunk, need_eof))
1836 self._snapshot = (dec_flags, input_chunk)
1837
1838 # Skip chars_to_skip of the decoded characters.
1839 if len(self._decoded_chars) < chars_to_skip:
1840 raise IOError("can't restore logical file position")
1841 self._decoded_chars_used = chars_to_skip
1842
Antoine Pitroue4501852009-05-14 18:55:55 +00001843 # Finally, reset the encoder (merely useful for proper BOM handling)
1844 try:
1845 encoder = self._encoder or self._get_encoder()
1846 except LookupError:
1847 # Sometimes the encoder doesn't exist
1848 pass
1849 else:
1850 if cookie != 0:
1851 encoder.setstate(0)
1852 else:
1853 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 return cookie
1855
1856 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001857 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858 if n is None:
1859 n = -1
1860 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001861 try:
1862 n.__index__
1863 except AttributeError as err:
1864 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865 if n < 0:
1866 # Read everything.
1867 result = (self._get_decoded_chars() +
1868 decoder.decode(self.buffer.read(), final=True))
1869 self._set_decoded_chars('')
1870 self._snapshot = None
1871 return result
1872 else:
1873 # Keep reading chunks until we have n characters to return.
1874 eof = False
1875 result = self._get_decoded_chars(n)
1876 while len(result) < n and not eof:
1877 eof = not self._read_chunk()
1878 result += self._get_decoded_chars(n - len(result))
1879 return result
1880
1881 def __next__(self):
1882 self._telling = False
1883 line = self.readline()
1884 if not line:
1885 self._snapshot = None
1886 self._telling = self._seekable
1887 raise StopIteration
1888 return line
1889
1890 def readline(self, limit=None):
1891 if self.closed:
1892 raise ValueError("read from closed file")
1893 if limit is None:
1894 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001895 elif not isinstance(limit, int):
1896 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897
1898 # Grab all the decoded text (we will rewind any extra bits later).
1899 line = self._get_decoded_chars()
1900
1901 start = 0
1902 # Make the decoder if it doesn't already exist.
1903 if not self._decoder:
1904 self._get_decoder()
1905
1906 pos = endpos = None
1907 while True:
1908 if self._readtranslate:
1909 # Newlines are already translated, only search for \n
1910 pos = line.find('\n', start)
1911 if pos >= 0:
1912 endpos = pos + 1
1913 break
1914 else:
1915 start = len(line)
1916
1917 elif self._readuniversal:
1918 # Universal newline search. Find any of \r, \r\n, \n
1919 # The decoder ensures that \r\n are not split in two pieces
1920
1921 # In C we'd look for these in parallel of course.
1922 nlpos = line.find("\n", start)
1923 crpos = line.find("\r", start)
1924 if crpos == -1:
1925 if nlpos == -1:
1926 # Nothing found
1927 start = len(line)
1928 else:
1929 # Found \n
1930 endpos = nlpos + 1
1931 break
1932 elif nlpos == -1:
1933 # Found lone \r
1934 endpos = crpos + 1
1935 break
1936 elif nlpos < crpos:
1937 # Found \n
1938 endpos = nlpos + 1
1939 break
1940 elif nlpos == crpos + 1:
1941 # Found \r\n
1942 endpos = crpos + 2
1943 break
1944 else:
1945 # Found \r
1946 endpos = crpos + 1
1947 break
1948 else:
1949 # non-universal
1950 pos = line.find(self._readnl)
1951 if pos >= 0:
1952 endpos = pos + len(self._readnl)
1953 break
1954
1955 if limit >= 0 and len(line) >= limit:
1956 endpos = limit # reached length limit
1957 break
1958
1959 # No line ending seen yet - get more data'
1960 while self._read_chunk():
1961 if self._decoded_chars:
1962 break
1963 if self._decoded_chars:
1964 line += self._get_decoded_chars()
1965 else:
1966 # end of file
1967 self._set_decoded_chars('')
1968 self._snapshot = None
1969 return line
1970
1971 if limit >= 0 and endpos > limit:
1972 endpos = limit # don't exceed limit
1973
1974 # Rewind _decoded_chars to just after the line ending we found.
1975 self._rewind_decoded_chars(len(line) - endpos)
1976 return line[:endpos]
1977
1978 @property
1979 def newlines(self):
1980 return self._decoder.newlines if self._decoder else None
1981
1982
1983class StringIO(TextIOWrapper):
1984 """Text I/O implementation using an in-memory buffer.
1985
1986 The initial_value argument sets the value of object. The newline
1987 argument is like the one of TextIOWrapper's constructor.
1988 """
1989
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990 def __init__(self, initial_value="", newline="\n"):
1991 super(StringIO, self).__init__(BytesIO(),
1992 encoding="utf-8",
1993 errors="strict",
1994 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001995 # Issue #5645: make universal newlines semantics the same as in the
1996 # C version, even under Windows.
1997 if newline is None:
1998 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001999 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002001 raise TypeError("initial_value must be str or None, not {0}"
2002 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003 initial_value = str(initial_value)
2004 self.write(initial_value)
2005 self.seek(0)
2006
2007 def getvalue(self):
2008 self.flush()
2009 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002010
2011 def __repr__(self):
2012 # TextIOWrapper tells the encoding in its repr. In StringIO,
2013 # that's a implementation detail.
2014 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002015
2016 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002017 def errors(self):
2018 return None
2019
2020 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002021 def encoding(self):
2022 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002023
2024 def detach(self):
2025 # This doesn't make sense on StringIO.
2026 self._unsupported("detach")