blob: 265edab1134548c33589112c071beda38b7384c0 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou707ce822011-02-25 21:24:11 +000017from errno import EINTR
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
Georg Brandl4d73b572011-01-13 07:13:06 +000038def open(file, mode="r", buffering=-1, encoding=None, errors=None,
39 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
Raymond Hettingercbb80892011-01-13 18:15:51 +000099 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
Raymond Hettingercbb80892011-01-13 18:15:51 +0000112 newline is a string controlling how universal newlines works (it only
113 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
114 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
Raymond Hettingercbb80892011-01-13 18:15:51 +0000130 closedfd is a bool. If closefd is False, the underlying file descriptor will
131 be kept open when the file is closed. This does not work when a file name is
132 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
Raymond Hettinger3c940242011-01-12 23:39:31 +0000290 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000291 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
Georg Brandl4d73b572011-01-13 07:13:06 +0000297 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000302 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 """
310 self._unsupported("seek")
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000313 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314 return self.seek(0, 1)
315
Georg Brandl4d73b572011-01-13 07:13:06 +0000316 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
Raymond Hettinger3c940242011-01-12 23:39:31 +0000326 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
Raymond Hettinger3c940242011-01-12 23:39:31 +0000336 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
Raymond Hettinger3c940242011-01-12 23:39:31 +0000345 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000360 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000375 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000389 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000420 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 self._checkClosed()
422 return self
423
Raymond Hettinger3c940242011-01-12 23:39:31 +0000424 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000433 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000440 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
Georg Brandl4d73b572011-01-13 07:13:06 +0000449 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000450 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451
452 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
455 The line terminator is always b'\n' for binary files; for text
456 files, the newlines argument to open can be used to select the line
457 terminator(s) recognized.
458 """
459 # For backwards compatibility, a (slowish) readline().
460 if hasattr(self, "peek"):
461 def nreadahead():
462 readahead = self.peek(1)
463 if not readahead:
464 return 1
465 n = (readahead.find(b"\n") + 1) or len(readahead)
466 if limit >= 0:
467 n = min(n, limit)
468 return n
469 else:
470 def nreadahead():
471 return 1
472 if limit is None:
473 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000474 elif not isinstance(limit, int):
475 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 res = bytearray()
477 while limit < 0 or len(res) < limit:
478 b = self.read(nreadahead())
479 if not b:
480 break
481 res += b
482 if res.endswith(b"\n"):
483 break
484 return bytes(res)
485
486 def __iter__(self):
487 self._checkClosed()
488 return self
489
490 def __next__(self):
491 line = self.readline()
492 if not line:
493 raise StopIteration
494 return line
495
496 def readlines(self, hint=None):
497 """Return a list of lines from the stream.
498
499 hint can be specified to control the number of lines read: no more
500 lines will be read if the total size (in bytes/characters) of all
501 lines so far exceeds hint.
502 """
503 if hint is None or hint <= 0:
504 return list(self)
505 n = 0
506 lines = []
507 for line in self:
508 lines.append(line)
509 n += len(line)
510 if n >= hint:
511 break
512 return lines
513
514 def writelines(self, lines):
515 self._checkClosed()
516 for line in lines:
517 self.write(line)
518
519io.IOBase.register(IOBase)
520
521
522class RawIOBase(IOBase):
523
524 """Base class for raw binary I/O."""
525
526 # The read() method is implemented by calling readinto(); derived
527 # classes that want to support read() only need to implement
528 # readinto() as a primitive operation. In general, readinto() can be
529 # more efficient than read().
530
531 # (It would be tempting to also provide an implementation of
532 # readinto() in terms of read(), in case the latter is a more suitable
533 # primitive operation, but that would lead to nasty recursion in case
534 # a subclass doesn't implement either.)
535
Georg Brandl4d73b572011-01-13 07:13:06 +0000536 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000537 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538
539 Returns an empty bytes object on EOF, or None if the object is
540 set not to block and has no data to read.
541 """
542 if n is None:
543 n = -1
544 if n < 0:
545 return self.readall()
546 b = bytearray(n.__index__())
547 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000548 if n is None:
549 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550 del b[n:]
551 return bytes(b)
552
553 def readall(self):
554 """Read until EOF, using multiple read() call."""
555 res = bytearray()
556 while True:
557 data = self.read(DEFAULT_BUFFER_SIZE)
558 if not data:
559 break
560 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200561 if res:
562 return bytes(res)
563 else:
564 # b'' or None
565 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566
Raymond Hettinger3c940242011-01-12 23:39:31 +0000567 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000568 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569
Raymond Hettingercbb80892011-01-13 18:15:51 +0000570 Returns an int representing the number of bytes read (0 for EOF), or
571 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572 """
573 self._unsupported("readinto")
574
Raymond Hettinger3c940242011-01-12 23:39:31 +0000575 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 """Write the given buffer to the IO stream.
577
578 Returns the number of bytes written, which may be less than len(b).
579 """
580 self._unsupported("write")
581
582io.RawIOBase.register(RawIOBase)
583from _io import FileIO
584RawIOBase.register(FileIO)
585
586
587class BufferedIOBase(IOBase):
588
589 """Base class for buffered IO objects.
590
591 The main difference with RawIOBase is that the read() method
592 supports omitting the size argument, and does not have a default
593 implementation that defers to readinto().
594
595 In addition, read(), readinto() and write() may raise
596 BlockingIOError if the underlying raw stream is in non-blocking
597 mode and not ready; unlike their raw counterparts, they will never
598 return None.
599
600 A typical implementation should not inherit from a RawIOBase
601 implementation, but wrap one.
602 """
603
Georg Brandl4d73b572011-01-13 07:13:06 +0000604 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000605 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606
607 If the argument is omitted, None, or negative, reads and
608 returns all data until EOF.
609
610 If the argument is positive, and the underlying raw stream is
611 not 'interactive', multiple raw reads may be issued to satisfy
612 the byte count (unless EOF is reached first). But for
613 interactive raw streams (XXX and for pipes?), at most one raw
614 read will be issued, and a short result does not imply that
615 EOF is imminent.
616
617 Returns an empty bytes array on EOF.
618
619 Raises BlockingIOError if the underlying raw stream has no
620 data at the moment.
621 """
622 self._unsupported("read")
623
Georg Brandl4d73b572011-01-13 07:13:06 +0000624 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000625 """Read up to n bytes with at most one read() system call,
626 where n is an int.
627 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000628 self._unsupported("read1")
629
Raymond Hettinger3c940242011-01-12 23:39:31 +0000630 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000631 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632
633 Like read(), this may issue multiple reads to the underlying raw
634 stream, unless the latter is 'interactive'.
635
Raymond Hettingercbb80892011-01-13 18:15:51 +0000636 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637
638 Raises BlockingIOError if the underlying raw stream has no
639 data at the moment.
640 """
641 # XXX This ought to work with anything that supports the buffer API
642 data = self.read(len(b))
643 n = len(data)
644 try:
645 b[:n] = data
646 except TypeError as err:
647 import array
648 if not isinstance(b, array.array):
649 raise err
650 b[:n] = array.array('b', data)
651 return n
652
Raymond Hettinger3c940242011-01-12 23:39:31 +0000653 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000654 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655
656 Return the number of bytes written, which is never less than
657 len(b).
658
659 Raises BlockingIOError if the buffer is full and the
660 underlying raw stream cannot accept more data at the moment.
661 """
662 self._unsupported("write")
663
Raymond Hettinger3c940242011-01-12 23:39:31 +0000664 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000665 """
666 Separate the underlying raw stream from the buffer and return it.
667
668 After the raw stream has been detached, the buffer is in an unusable
669 state.
670 """
671 self._unsupported("detach")
672
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673io.BufferedIOBase.register(BufferedIOBase)
674
675
676class _BufferedIOMixin(BufferedIOBase):
677
678 """A mixin implementation of BufferedIOBase with an underlying raw stream.
679
680 This passes most requests on to the underlying raw stream. It
681 does *not* provide implementations of read(), readinto() or
682 write().
683 """
684
685 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000686 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000687
688 ### Positioning ###
689
690 def seek(self, pos, whence=0):
691 new_position = self.raw.seek(pos, whence)
692 if new_position < 0:
693 raise IOError("seek() returned an invalid position")
694 return new_position
695
696 def tell(self):
697 pos = self.raw.tell()
698 if pos < 0:
699 raise IOError("tell() returned an invalid position")
700 return pos
701
702 def truncate(self, pos=None):
703 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
704 # and a flush may be necessary to synch both views of the current
705 # file state.
706 self.flush()
707
708 if pos is None:
709 pos = self.tell()
710 # XXX: Should seek() be used, instead of passing the position
711 # XXX directly to truncate?
712 return self.raw.truncate(pos)
713
714 ### Flush and close ###
715
716 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000717 if self.closed:
718 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719 self.raw.flush()
720
721 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000722 if self.raw is not None and not self.closed:
723 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724 self.raw.close()
725
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000726 def detach(self):
727 if self.raw is None:
728 raise ValueError("raw stream already detached")
729 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000730 raw = self._raw
731 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000732 return raw
733
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734 ### Inquiries ###
735
736 def seekable(self):
737 return self.raw.seekable()
738
739 def readable(self):
740 return self.raw.readable()
741
742 def writable(self):
743 return self.raw.writable()
744
745 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000746 def raw(self):
747 return self._raw
748
749 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750 def closed(self):
751 return self.raw.closed
752
753 @property
754 def name(self):
755 return self.raw.name
756
757 @property
758 def mode(self):
759 return self.raw.mode
760
Antoine Pitrou243757e2010-11-05 21:15:39 +0000761 def __getstate__(self):
762 raise TypeError("can not serialize a '{0}' object"
763 .format(self.__class__.__name__))
764
Antoine Pitrou716c4442009-05-23 19:04:03 +0000765 def __repr__(self):
766 clsname = self.__class__.__name__
767 try:
768 name = self.name
769 except AttributeError:
770 return "<_pyio.{0}>".format(clsname)
771 else:
772 return "<_pyio.{0} name={1!r}>".format(clsname, name)
773
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774 ### Lower-level APIs ###
775
776 def fileno(self):
777 return self.raw.fileno()
778
779 def isatty(self):
780 return self.raw.isatty()
781
782
783class BytesIO(BufferedIOBase):
784
785 """Buffered I/O implementation using an in-memory bytes buffer."""
786
787 def __init__(self, initial_bytes=None):
788 buf = bytearray()
789 if initial_bytes is not None:
790 buf += initial_bytes
791 self._buffer = buf
792 self._pos = 0
793
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000794 def __getstate__(self):
795 if self.closed:
796 raise ValueError("__getstate__ on closed file")
797 return self.__dict__.copy()
798
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 def getvalue(self):
800 """Return the bytes value (contents) of the buffer
801 """
802 if self.closed:
803 raise ValueError("getvalue on closed file")
804 return bytes(self._buffer)
805
Antoine Pitrou972ee132010-09-06 18:48:21 +0000806 def getbuffer(self):
807 """Return a readable and writable view of the buffer.
808 """
809 return memoryview(self._buffer)
810
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 def read(self, n=None):
812 if self.closed:
813 raise ValueError("read from closed file")
814 if n is None:
815 n = -1
816 if n < 0:
817 n = len(self._buffer)
818 if len(self._buffer) <= self._pos:
819 return b""
820 newpos = min(len(self._buffer), self._pos + n)
821 b = self._buffer[self._pos : newpos]
822 self._pos = newpos
823 return bytes(b)
824
825 def read1(self, n):
826 """This is the same as read.
827 """
828 return self.read(n)
829
830 def write(self, b):
831 if self.closed:
832 raise ValueError("write to closed file")
833 if isinstance(b, str):
834 raise TypeError("can't write str to binary stream")
835 n = len(b)
836 if n == 0:
837 return 0
838 pos = self._pos
839 if pos > len(self._buffer):
840 # Inserts null bytes between the current end of the file
841 # and the new write position.
842 padding = b'\x00' * (pos - len(self._buffer))
843 self._buffer += padding
844 self._buffer[pos:pos + n] = b
845 self._pos += n
846 return n
847
848 def seek(self, pos, whence=0):
849 if self.closed:
850 raise ValueError("seek on closed file")
851 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000852 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 except AttributeError as err:
854 raise TypeError("an integer is required") from err
855 if whence == 0:
856 if pos < 0:
857 raise ValueError("negative seek position %r" % (pos,))
858 self._pos = pos
859 elif whence == 1:
860 self._pos = max(0, self._pos + pos)
861 elif whence == 2:
862 self._pos = max(0, len(self._buffer) + pos)
863 else:
864 raise ValueError("invalid whence value")
865 return self._pos
866
867 def tell(self):
868 if self.closed:
869 raise ValueError("tell on closed file")
870 return self._pos
871
872 def truncate(self, pos=None):
873 if self.closed:
874 raise ValueError("truncate on closed file")
875 if pos is None:
876 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000877 else:
878 try:
879 pos.__index__
880 except AttributeError as err:
881 raise TypeError("an integer is required") from err
882 if pos < 0:
883 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000885 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000886
887 def readable(self):
888 return True
889
890 def writable(self):
891 return True
892
893 def seekable(self):
894 return True
895
896
897class BufferedReader(_BufferedIOMixin):
898
899 """BufferedReader(raw[, buffer_size])
900
901 A buffer for a readable, sequential BaseRawIO object.
902
903 The constructor creates a BufferedReader for the given readable raw
904 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
905 is used.
906 """
907
908 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
909 """Create a new buffered reader using the given readable raw IO object.
910 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000911 if not raw.readable():
912 raise IOError('"raw" argument must be readable.')
913
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000914 _BufferedIOMixin.__init__(self, raw)
915 if buffer_size <= 0:
916 raise ValueError("invalid buffer size")
917 self.buffer_size = buffer_size
918 self._reset_read_buf()
919 self._read_lock = Lock()
920
921 def _reset_read_buf(self):
922 self._read_buf = b""
923 self._read_pos = 0
924
925 def read(self, n=None):
926 """Read n bytes.
927
928 Returns exactly n bytes of data unless the underlying raw IO
929 stream reaches EOF or if the call would block in non-blocking
930 mode. If n is negative, read until EOF or until read() would
931 block.
932 """
933 if n is not None and n < -1:
934 raise ValueError("invalid number of bytes to read")
935 with self._read_lock:
936 return self._read_unlocked(n)
937
938 def _read_unlocked(self, n=None):
939 nodata_val = b""
940 empty_values = (b"", None)
941 buf = self._read_buf
942 pos = self._read_pos
943
944 # Special case for when the number of bytes to read is unspecified.
945 if n is None or n == -1:
946 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200947 if hasattr(self.raw, 'readall'):
948 chunk = self.raw.readall()
949 if chunk is None:
950 return buf[pos:] or None
951 else:
952 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 chunks = [buf[pos:]] # Strip the consumed bytes.
954 current_size = 0
955 while True:
956 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000957 try:
958 chunk = self.raw.read()
959 except IOError as e:
960 if e.errno != EINTR:
961 raise
962 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 if chunk in empty_values:
964 nodata_val = chunk
965 break
966 current_size += len(chunk)
967 chunks.append(chunk)
968 return b"".join(chunks) or nodata_val
969
970 # The number of bytes to read is specified, return at most n bytes.
971 avail = len(buf) - pos # Length of the available buffered data.
972 if n <= avail:
973 # Fast path: the data to read is fully buffered.
974 self._read_pos += n
975 return buf[pos:pos+n]
976 # Slow path: read from the stream until enough bytes are read,
977 # or until an EOF occurs or until read() would block.
978 chunks = [buf[pos:]]
979 wanted = max(self.buffer_size, n)
980 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000981 try:
982 chunk = self.raw.read(wanted)
983 except IOError as e:
984 if e.errno != EINTR:
985 raise
986 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000987 if chunk in empty_values:
988 nodata_val = chunk
989 break
990 avail += len(chunk)
991 chunks.append(chunk)
992 # n is more then avail only when an EOF occurred or when
993 # read() would have blocked.
994 n = min(n, avail)
995 out = b"".join(chunks)
996 self._read_buf = out[n:] # Save the extra data in the buffer.
997 self._read_pos = 0
998 return out[:n] if out else nodata_val
999
1000 def peek(self, n=0):
1001 """Returns buffered bytes without advancing the position.
1002
1003 The argument indicates a desired minimal number of bytes; we
1004 do at most one raw read to satisfy it. We never return more
1005 than self.buffer_size.
1006 """
1007 with self._read_lock:
1008 return self._peek_unlocked(n)
1009
1010 def _peek_unlocked(self, n=0):
1011 want = min(n, self.buffer_size)
1012 have = len(self._read_buf) - self._read_pos
1013 if have < want or have <= 0:
1014 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001015 while True:
1016 try:
1017 current = self.raw.read(to_read)
1018 except IOError as e:
1019 if e.errno != EINTR:
1020 raise
1021 continue
1022 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 if current:
1024 self._read_buf = self._read_buf[self._read_pos:] + current
1025 self._read_pos = 0
1026 return self._read_buf[self._read_pos:]
1027
1028 def read1(self, n):
1029 """Reads up to n bytes, with at most one read() system call."""
1030 # Returns up to n bytes. If at least one byte is buffered, we
1031 # only return buffered bytes. Otherwise, we do one raw read.
1032 if n < 0:
1033 raise ValueError("number of bytes to read must be positive")
1034 if n == 0:
1035 return b""
1036 with self._read_lock:
1037 self._peek_unlocked(1)
1038 return self._read_unlocked(
1039 min(n, len(self._read_buf) - self._read_pos))
1040
1041 def tell(self):
1042 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1043
1044 def seek(self, pos, whence=0):
1045 if not (0 <= whence <= 2):
1046 raise ValueError("invalid whence value")
1047 with self._read_lock:
1048 if whence == 1:
1049 pos -= len(self._read_buf) - self._read_pos
1050 pos = _BufferedIOMixin.seek(self, pos, whence)
1051 self._reset_read_buf()
1052 return pos
1053
1054class BufferedWriter(_BufferedIOMixin):
1055
1056 """A buffer for a writeable sequential RawIO object.
1057
1058 The constructor creates a BufferedWriter for the given writeable raw
1059 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001060 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061 """
1062
Benjamin Peterson59406a92009-03-26 17:10:29 +00001063 _warning_stack_offset = 2
1064
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 def __init__(self, raw,
1066 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001067 if not raw.writable():
1068 raise IOError('"raw" argument must be writable.')
1069
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 _BufferedIOMixin.__init__(self, raw)
1071 if buffer_size <= 0:
1072 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001073 if max_buffer_size is not None:
1074 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1075 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001076 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 self._write_buf = bytearray()
1078 self._write_lock = Lock()
1079
1080 def write(self, b):
1081 if self.closed:
1082 raise ValueError("write to closed file")
1083 if isinstance(b, str):
1084 raise TypeError("can't write str to binary stream")
1085 with self._write_lock:
1086 # XXX we can implement some more tricks to try and avoid
1087 # partial writes
1088 if len(self._write_buf) > self.buffer_size:
1089 # We're full, so let's pre-flush the buffer
1090 try:
1091 self._flush_unlocked()
1092 except BlockingIOError as e:
1093 # We can't accept anything else.
1094 # XXX Why not just let the exception pass through?
1095 raise BlockingIOError(e.errno, e.strerror, 0)
1096 before = len(self._write_buf)
1097 self._write_buf.extend(b)
1098 written = len(self._write_buf) - before
1099 if len(self._write_buf) > self.buffer_size:
1100 try:
1101 self._flush_unlocked()
1102 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001103 if len(self._write_buf) > self.buffer_size:
1104 # We've hit the buffer_size. We have to accept a partial
1105 # write and cut back our buffer.
1106 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001108 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 raise BlockingIOError(e.errno, e.strerror, written)
1110 return written
1111
1112 def truncate(self, pos=None):
1113 with self._write_lock:
1114 self._flush_unlocked()
1115 if pos is None:
1116 pos = self.raw.tell()
1117 return self.raw.truncate(pos)
1118
1119 def flush(self):
1120 with self._write_lock:
1121 self._flush_unlocked()
1122
1123 def _flush_unlocked(self):
1124 if self.closed:
1125 raise ValueError("flush of closed file")
1126 written = 0
1127 try:
1128 while self._write_buf:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001129 try:
1130 n = self.raw.write(self._write_buf)
1131 except IOError as e:
1132 if e.errno != EINTR:
1133 raise
1134 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 if n > len(self._write_buf) or n < 0:
1136 raise IOError("write() returned incorrect number of bytes")
1137 del self._write_buf[:n]
1138 written += n
1139 except BlockingIOError as e:
1140 n = e.characters_written
1141 del self._write_buf[:n]
1142 written += n
1143 raise BlockingIOError(e.errno, e.strerror, written)
1144
1145 def tell(self):
1146 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1147
1148 def seek(self, pos, whence=0):
1149 if not (0 <= whence <= 2):
1150 raise ValueError("invalid whence")
1151 with self._write_lock:
1152 self._flush_unlocked()
1153 return _BufferedIOMixin.seek(self, pos, whence)
1154
1155
1156class BufferedRWPair(BufferedIOBase):
1157
1158 """A buffered reader and writer object together.
1159
1160 A buffered reader object and buffered writer object put together to
1161 form a sequential IO object that can read and write. This is typically
1162 used with a socket or two-way pipe.
1163
1164 reader and writer are RawIOBase objects that are readable and
1165 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001166 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001167 """
1168
1169 # XXX The usefulness of this (compared to having two separate IO
1170 # objects) is questionable.
1171
1172 def __init__(self, reader, writer,
1173 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1174 """Constructor.
1175
1176 The arguments are two RawIO instances.
1177 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001178 if max_buffer_size is not None:
1179 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001180
1181 if not reader.readable():
1182 raise IOError('"reader" argument must be readable.')
1183
1184 if not writer.writable():
1185 raise IOError('"writer" argument must be writable.')
1186
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001188 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189
1190 def read(self, n=None):
1191 if n is None:
1192 n = -1
1193 return self.reader.read(n)
1194
1195 def readinto(self, b):
1196 return self.reader.readinto(b)
1197
1198 def write(self, b):
1199 return self.writer.write(b)
1200
1201 def peek(self, n=0):
1202 return self.reader.peek(n)
1203
1204 def read1(self, n):
1205 return self.reader.read1(n)
1206
1207 def readable(self):
1208 return self.reader.readable()
1209
1210 def writable(self):
1211 return self.writer.writable()
1212
1213 def flush(self):
1214 return self.writer.flush()
1215
1216 def close(self):
1217 self.writer.close()
1218 self.reader.close()
1219
1220 def isatty(self):
1221 return self.reader.isatty() or self.writer.isatty()
1222
1223 @property
1224 def closed(self):
1225 return self.writer.closed
1226
1227
1228class BufferedRandom(BufferedWriter, BufferedReader):
1229
1230 """A buffered interface to random access streams.
1231
1232 The constructor creates a reader and writer for a seekable stream,
1233 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001234 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 """
1236
Benjamin Peterson59406a92009-03-26 17:10:29 +00001237 _warning_stack_offset = 3
1238
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001239 def __init__(self, raw,
1240 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1241 raw._checkSeekable()
1242 BufferedReader.__init__(self, raw, buffer_size)
1243 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1244
1245 def seek(self, pos, whence=0):
1246 if not (0 <= whence <= 2):
1247 raise ValueError("invalid whence")
1248 self.flush()
1249 if self._read_buf:
1250 # Undo read ahead.
1251 with self._read_lock:
1252 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1253 # First do the raw seek, then empty the read buffer, so that
1254 # if the raw seek fails, we don't lose buffered data forever.
1255 pos = self.raw.seek(pos, whence)
1256 with self._read_lock:
1257 self._reset_read_buf()
1258 if pos < 0:
1259 raise IOError("seek() returned invalid position")
1260 return pos
1261
1262 def tell(self):
1263 if self._write_buf:
1264 return BufferedWriter.tell(self)
1265 else:
1266 return BufferedReader.tell(self)
1267
1268 def truncate(self, pos=None):
1269 if pos is None:
1270 pos = self.tell()
1271 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001272 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273
1274 def read(self, n=None):
1275 if n is None:
1276 n = -1
1277 self.flush()
1278 return BufferedReader.read(self, n)
1279
1280 def readinto(self, b):
1281 self.flush()
1282 return BufferedReader.readinto(self, b)
1283
1284 def peek(self, n=0):
1285 self.flush()
1286 return BufferedReader.peek(self, n)
1287
1288 def read1(self, n):
1289 self.flush()
1290 return BufferedReader.read1(self, n)
1291
1292 def write(self, b):
1293 if self._read_buf:
1294 # Undo readahead
1295 with self._read_lock:
1296 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1297 self._reset_read_buf()
1298 return BufferedWriter.write(self, b)
1299
1300
1301class TextIOBase(IOBase):
1302
1303 """Base class for text I/O.
1304
1305 This class provides a character and line based interface to stream
1306 I/O. There is no readinto method because Python's character strings
1307 are immutable. There is no public constructor.
1308 """
1309
Georg Brandl4d73b572011-01-13 07:13:06 +00001310 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001311 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312
1313 Read from underlying buffer until we have n characters or we hit EOF.
1314 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001315
1316 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 """
1318 self._unsupported("read")
1319
Raymond Hettinger3c940242011-01-12 23:39:31 +00001320 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001321 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 self._unsupported("write")
1323
Georg Brandl4d73b572011-01-13 07:13:06 +00001324 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001325 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 self._unsupported("truncate")
1327
Raymond Hettinger3c940242011-01-12 23:39:31 +00001328 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329 """Read until newline or EOF.
1330
1331 Returns an empty string if EOF is hit immediately.
1332 """
1333 self._unsupported("readline")
1334
Raymond Hettinger3c940242011-01-12 23:39:31 +00001335 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001336 """
1337 Separate the underlying buffer from the TextIOBase and return it.
1338
1339 After the underlying buffer has been detached, the TextIO is in an
1340 unusable state.
1341 """
1342 self._unsupported("detach")
1343
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001344 @property
1345 def encoding(self):
1346 """Subclasses should override."""
1347 return None
1348
1349 @property
1350 def newlines(self):
1351 """Line endings translated so far.
1352
1353 Only line endings translated during reading are considered.
1354
1355 Subclasses should override.
1356 """
1357 return None
1358
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001359 @property
1360 def errors(self):
1361 """Error setting of the decoder or encoder.
1362
1363 Subclasses should override."""
1364 return None
1365
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366io.TextIOBase.register(TextIOBase)
1367
1368
1369class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1370 r"""Codec used when reading a file in universal newlines mode. It wraps
1371 another incremental decoder, translating \r\n and \r into \n. It also
1372 records the types of newlines encountered. When used with
1373 translate=False, it ensures that the newline sequence is returned in
1374 one piece.
1375 """
1376 def __init__(self, decoder, translate, errors='strict'):
1377 codecs.IncrementalDecoder.__init__(self, errors=errors)
1378 self.translate = translate
1379 self.decoder = decoder
1380 self.seennl = 0
1381 self.pendingcr = False
1382
1383 def decode(self, input, final=False):
1384 # decode input (with the eventual \r from a previous pass)
1385 if self.decoder is None:
1386 output = input
1387 else:
1388 output = self.decoder.decode(input, final=final)
1389 if self.pendingcr and (output or final):
1390 output = "\r" + output
1391 self.pendingcr = False
1392
1393 # retain last \r even when not translating data:
1394 # then readline() is sure to get \r\n in one pass
1395 if output.endswith("\r") and not final:
1396 output = output[:-1]
1397 self.pendingcr = True
1398
1399 # Record which newlines are read
1400 crlf = output.count('\r\n')
1401 cr = output.count('\r') - crlf
1402 lf = output.count('\n') - crlf
1403 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1404 | (crlf and self._CRLF)
1405
1406 if self.translate:
1407 if crlf:
1408 output = output.replace("\r\n", "\n")
1409 if cr:
1410 output = output.replace("\r", "\n")
1411
1412 return output
1413
1414 def getstate(self):
1415 if self.decoder is None:
1416 buf = b""
1417 flag = 0
1418 else:
1419 buf, flag = self.decoder.getstate()
1420 flag <<= 1
1421 if self.pendingcr:
1422 flag |= 1
1423 return buf, flag
1424
1425 def setstate(self, state):
1426 buf, flag = state
1427 self.pendingcr = bool(flag & 1)
1428 if self.decoder is not None:
1429 self.decoder.setstate((buf, flag >> 1))
1430
1431 def reset(self):
1432 self.seennl = 0
1433 self.pendingcr = False
1434 if self.decoder is not None:
1435 self.decoder.reset()
1436
1437 _LF = 1
1438 _CR = 2
1439 _CRLF = 4
1440
1441 @property
1442 def newlines(self):
1443 return (None,
1444 "\n",
1445 "\r",
1446 ("\r", "\n"),
1447 "\r\n",
1448 ("\n", "\r\n"),
1449 ("\r", "\r\n"),
1450 ("\r", "\n", "\r\n")
1451 )[self.seennl]
1452
1453
1454class TextIOWrapper(TextIOBase):
1455
1456 r"""Character and line based layer over a BufferedIOBase object, buffer.
1457
1458 encoding gives the name of the encoding that the stream will be
1459 decoded or encoded with. It defaults to locale.getpreferredencoding.
1460
1461 errors determines the strictness of encoding and decoding (see the
1462 codecs.register) and defaults to "strict".
1463
1464 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1465 handling of line endings. If it is None, universal newlines is
1466 enabled. With this enabled, on input, the lines endings '\n', '\r',
1467 or '\r\n' are translated to '\n' before being returned to the
1468 caller. Conversely, on output, '\n' is translated to the system
1469 default line seperator, os.linesep. If newline is any other of its
1470 legal values, that newline becomes the newline when the file is read
1471 and it is returned untranslated. On output, '\n' is converted to the
1472 newline.
1473
1474 If line_buffering is True, a call to flush is implied when a call to
1475 write contains a newline character.
1476 """
1477
1478 _CHUNK_SIZE = 2048
1479
1480 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1481 line_buffering=False):
1482 if newline is not None and not isinstance(newline, str):
1483 raise TypeError("illegal newline type: %r" % (type(newline),))
1484 if newline not in (None, "", "\n", "\r", "\r\n"):
1485 raise ValueError("illegal newline value: %r" % (newline,))
1486 if encoding is None:
1487 try:
1488 encoding = os.device_encoding(buffer.fileno())
1489 except (AttributeError, UnsupportedOperation):
1490 pass
1491 if encoding is None:
1492 try:
1493 import locale
1494 except ImportError:
1495 # Importing locale may fail if Python is being built
1496 encoding = "ascii"
1497 else:
1498 encoding = locale.getpreferredencoding()
1499
1500 if not isinstance(encoding, str):
1501 raise ValueError("invalid encoding: %r" % encoding)
1502
1503 if errors is None:
1504 errors = "strict"
1505 else:
1506 if not isinstance(errors, str):
1507 raise ValueError("invalid errors: %r" % errors)
1508
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001509 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 self._line_buffering = line_buffering
1511 self._encoding = encoding
1512 self._errors = errors
1513 self._readuniversal = not newline
1514 self._readtranslate = newline is None
1515 self._readnl = newline
1516 self._writetranslate = newline != ''
1517 self._writenl = newline or os.linesep
1518 self._encoder = None
1519 self._decoder = None
1520 self._decoded_chars = '' # buffer for text returned from decoder
1521 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1522 self._snapshot = None # info for reconstructing decoder state
1523 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001524 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525
Antoine Pitroue4501852009-05-14 18:55:55 +00001526 if self._seekable and self.writable():
1527 position = self.buffer.tell()
1528 if position != 0:
1529 try:
1530 self._get_encoder().setstate(0)
1531 except LookupError:
1532 # Sometimes the encoder doesn't exist
1533 pass
1534
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001535 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1536 # where dec_flags is the second (integer) item of the decoder state
1537 # and next_input is the chunk of input bytes that comes next after the
1538 # snapshot point. We use this to reconstruct decoder states in tell().
1539
1540 # Naming convention:
1541 # - "bytes_..." for integer variables that count input bytes
1542 # - "chars_..." for integer variables that count decoded characters
1543
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001544 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001545 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001546 try:
1547 name = self.name
1548 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001549 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001550 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001551 result += " name={0!r}".format(name)
1552 try:
1553 mode = self.mode
1554 except AttributeError:
1555 pass
1556 else:
1557 result += " mode={0!r}".format(mode)
1558 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001559
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001560 @property
1561 def encoding(self):
1562 return self._encoding
1563
1564 @property
1565 def errors(self):
1566 return self._errors
1567
1568 @property
1569 def line_buffering(self):
1570 return self._line_buffering
1571
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001572 @property
1573 def buffer(self):
1574 return self._buffer
1575
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 def seekable(self):
1577 return self._seekable
1578
1579 def readable(self):
1580 return self.buffer.readable()
1581
1582 def writable(self):
1583 return self.buffer.writable()
1584
1585 def flush(self):
1586 self.buffer.flush()
1587 self._telling = self._seekable
1588
1589 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001590 if self.buffer is not None and not self.closed:
1591 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001592 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593
1594 @property
1595 def closed(self):
1596 return self.buffer.closed
1597
1598 @property
1599 def name(self):
1600 return self.buffer.name
1601
1602 def fileno(self):
1603 return self.buffer.fileno()
1604
1605 def isatty(self):
1606 return self.buffer.isatty()
1607
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001608 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001609 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610 if self.closed:
1611 raise ValueError("write to closed file")
1612 if not isinstance(s, str):
1613 raise TypeError("can't write %s to text stream" %
1614 s.__class__.__name__)
1615 length = len(s)
1616 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1617 if haslf and self._writetranslate and self._writenl != "\n":
1618 s = s.replace("\n", self._writenl)
1619 encoder = self._encoder or self._get_encoder()
1620 # XXX What if we were just reading?
1621 b = encoder.encode(s)
1622 self.buffer.write(b)
1623 if self._line_buffering and (haslf or "\r" in s):
1624 self.flush()
1625 self._snapshot = None
1626 if self._decoder:
1627 self._decoder.reset()
1628 return length
1629
1630 def _get_encoder(self):
1631 make_encoder = codecs.getincrementalencoder(self._encoding)
1632 self._encoder = make_encoder(self._errors)
1633 return self._encoder
1634
1635 def _get_decoder(self):
1636 make_decoder = codecs.getincrementaldecoder(self._encoding)
1637 decoder = make_decoder(self._errors)
1638 if self._readuniversal:
1639 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1640 self._decoder = decoder
1641 return decoder
1642
1643 # The following three methods implement an ADT for _decoded_chars.
1644 # Text returned from the decoder is buffered here until the client
1645 # requests it by calling our read() or readline() method.
1646 def _set_decoded_chars(self, chars):
1647 """Set the _decoded_chars buffer."""
1648 self._decoded_chars = chars
1649 self._decoded_chars_used = 0
1650
1651 def _get_decoded_chars(self, n=None):
1652 """Advance into the _decoded_chars buffer."""
1653 offset = self._decoded_chars_used
1654 if n is None:
1655 chars = self._decoded_chars[offset:]
1656 else:
1657 chars = self._decoded_chars[offset:offset + n]
1658 self._decoded_chars_used += len(chars)
1659 return chars
1660
1661 def _rewind_decoded_chars(self, n):
1662 """Rewind the _decoded_chars buffer."""
1663 if self._decoded_chars_used < n:
1664 raise AssertionError("rewind decoded_chars out of bounds")
1665 self._decoded_chars_used -= n
1666
1667 def _read_chunk(self):
1668 """
1669 Read and decode the next chunk of data from the BufferedReader.
1670 """
1671
1672 # The return value is True unless EOF was reached. The decoded
1673 # string is placed in self._decoded_chars (replacing its previous
1674 # value). The entire input chunk is sent to the decoder, though
1675 # some of it may remain buffered in the decoder, yet to be
1676 # converted.
1677
1678 if self._decoder is None:
1679 raise ValueError("no decoder")
1680
1681 if self._telling:
1682 # To prepare for tell(), we need to snapshot a point in the
1683 # file where the decoder's input buffer is empty.
1684
1685 dec_buffer, dec_flags = self._decoder.getstate()
1686 # Given this, we know there was a valid snapshot point
1687 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1688
1689 # Read a chunk, decode it, and put the result in self._decoded_chars.
1690 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1691 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001692 decoded_chars = self._decoder.decode(input_chunk, eof)
1693 self._set_decoded_chars(decoded_chars)
1694 if decoded_chars:
1695 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1696 else:
1697 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698
1699 if self._telling:
1700 # At the snapshot point, len(dec_buffer) bytes before the read,
1701 # the next input to be decoded is dec_buffer + input_chunk.
1702 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1703
1704 return not eof
1705
1706 def _pack_cookie(self, position, dec_flags=0,
1707 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1708 # The meaning of a tell() cookie is: seek to position, set the
1709 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1710 # into the decoder with need_eof as the EOF flag, then skip
1711 # chars_to_skip characters of the decoded result. For most simple
1712 # decoders, tell() will often just give a byte offset in the file.
1713 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1714 (chars_to_skip<<192) | bool(need_eof)<<256)
1715
1716 def _unpack_cookie(self, bigint):
1717 rest, position = divmod(bigint, 1<<64)
1718 rest, dec_flags = divmod(rest, 1<<64)
1719 rest, bytes_to_feed = divmod(rest, 1<<64)
1720 need_eof, chars_to_skip = divmod(rest, 1<<64)
1721 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1722
1723 def tell(self):
1724 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001725 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 if not self._telling:
1727 raise IOError("telling position disabled by next() call")
1728 self.flush()
1729 position = self.buffer.tell()
1730 decoder = self._decoder
1731 if decoder is None or self._snapshot is None:
1732 if self._decoded_chars:
1733 # This should never happen.
1734 raise AssertionError("pending decoded text")
1735 return position
1736
1737 # Skip backward to the snapshot point (see _read_chunk).
1738 dec_flags, next_input = self._snapshot
1739 position -= len(next_input)
1740
1741 # How many decoded characters have been used up since the snapshot?
1742 chars_to_skip = self._decoded_chars_used
1743 if chars_to_skip == 0:
1744 # We haven't moved from the snapshot point.
1745 return self._pack_cookie(position, dec_flags)
1746
1747 # Starting from the snapshot position, we will walk the decoder
1748 # forward until it gives us enough decoded characters.
1749 saved_state = decoder.getstate()
1750 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001751 # Fast search for an acceptable start point, close to our
1752 # current pos.
1753 # Rationale: calling decoder.decode() has a large overhead
1754 # regardless of chunk size; we want the number of such calls to
1755 # be O(1) in most situations (common decoders, non-crazy input).
1756 # Actually, it will be exactly 1 for fixed-size codecs (all
1757 # 8-bit codecs, also UTF-16 and UTF-32).
1758 skip_bytes = int(self._b2cratio * chars_to_skip)
1759 skip_back = 1
1760 assert skip_bytes <= len(next_input)
1761 while skip_bytes > 0:
1762 decoder.setstate((b'', dec_flags))
1763 # Decode up to temptative start point
1764 n = len(decoder.decode(next_input[:skip_bytes]))
1765 if n <= chars_to_skip:
1766 b, d = decoder.getstate()
1767 if not b:
1768 # Before pos and no bytes buffered in decoder => OK
1769 dec_flags = d
1770 chars_to_skip -= n
1771 break
1772 # Skip back by buffered amount and reset heuristic
1773 skip_bytes -= len(b)
1774 skip_back = 1
1775 else:
1776 # We're too far ahead, skip back a bit
1777 skip_bytes -= skip_back
1778 skip_back = skip_back * 2
1779 else:
1780 skip_bytes = 0
1781 decoder.setstate((b'', dec_flags))
1782
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001784 start_pos = position + skip_bytes
1785 start_flags = dec_flags
1786 if chars_to_skip == 0:
1787 # We haven't moved from the start point.
1788 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789
1790 # Feed the decoder one byte at a time. As we go, note the
1791 # nearest "safe start point" before the current location
1792 # (a point where the decoder has nothing buffered, so seek()
1793 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001794 bytes_fed = 0
1795 need_eof = 0
1796 # Chars decoded since `start_pos`
1797 chars_decoded = 0
1798 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001800 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 dec_buffer, dec_flags = decoder.getstate()
1802 if not dec_buffer and chars_decoded <= chars_to_skip:
1803 # Decoder buffer is empty, so this is a safe start point.
1804 start_pos += bytes_fed
1805 chars_to_skip -= chars_decoded
1806 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1807 if chars_decoded >= chars_to_skip:
1808 break
1809 else:
1810 # We didn't get enough decoded data; signal EOF to get more.
1811 chars_decoded += len(decoder.decode(b'', final=True))
1812 need_eof = 1
1813 if chars_decoded < chars_to_skip:
1814 raise IOError("can't reconstruct logical file position")
1815
1816 # The returned cookie corresponds to the last safe start point.
1817 return self._pack_cookie(
1818 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1819 finally:
1820 decoder.setstate(saved_state)
1821
1822 def truncate(self, pos=None):
1823 self.flush()
1824 if pos is None:
1825 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001826 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001828 def detach(self):
1829 if self.buffer is None:
1830 raise ValueError("buffer is already detached")
1831 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001832 buffer = self._buffer
1833 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001834 return buffer
1835
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001836 def seek(self, cookie, whence=0):
1837 if self.closed:
1838 raise ValueError("tell on closed file")
1839 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001840 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 if whence == 1: # seek relative to current position
1842 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001843 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844 # Seeking to the current position should attempt to
1845 # sync the underlying buffer with the current position.
1846 whence = 0
1847 cookie = self.tell()
1848 if whence == 2: # seek relative to end of file
1849 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001850 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 self.flush()
1852 position = self.buffer.seek(0, 2)
1853 self._set_decoded_chars('')
1854 self._snapshot = None
1855 if self._decoder:
1856 self._decoder.reset()
1857 return position
1858 if whence != 0:
1859 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1860 (whence,))
1861 if cookie < 0:
1862 raise ValueError("negative seek position %r" % (cookie,))
1863 self.flush()
1864
1865 # The strategy of seek() is to go back to the safe start point
1866 # and replay the effect of read(chars_to_skip) from there.
1867 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1868 self._unpack_cookie(cookie)
1869
1870 # Seek back to the safe start point.
1871 self.buffer.seek(start_pos)
1872 self._set_decoded_chars('')
1873 self._snapshot = None
1874
1875 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001876 if cookie == 0 and self._decoder:
1877 self._decoder.reset()
1878 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001879 self._decoder = self._decoder or self._get_decoder()
1880 self._decoder.setstate((b'', dec_flags))
1881 self._snapshot = (dec_flags, b'')
1882
1883 if chars_to_skip:
1884 # Just like _read_chunk, feed the decoder and save a snapshot.
1885 input_chunk = self.buffer.read(bytes_to_feed)
1886 self._set_decoded_chars(
1887 self._decoder.decode(input_chunk, need_eof))
1888 self._snapshot = (dec_flags, input_chunk)
1889
1890 # Skip chars_to_skip of the decoded characters.
1891 if len(self._decoded_chars) < chars_to_skip:
1892 raise IOError("can't restore logical file position")
1893 self._decoded_chars_used = chars_to_skip
1894
Antoine Pitroue4501852009-05-14 18:55:55 +00001895 # Finally, reset the encoder (merely useful for proper BOM handling)
1896 try:
1897 encoder = self._encoder or self._get_encoder()
1898 except LookupError:
1899 # Sometimes the encoder doesn't exist
1900 pass
1901 else:
1902 if cookie != 0:
1903 encoder.setstate(0)
1904 else:
1905 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906 return cookie
1907
1908 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001909 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 if n is None:
1911 n = -1
1912 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001913 try:
1914 n.__index__
1915 except AttributeError as err:
1916 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 if n < 0:
1918 # Read everything.
1919 result = (self._get_decoded_chars() +
1920 decoder.decode(self.buffer.read(), final=True))
1921 self._set_decoded_chars('')
1922 self._snapshot = None
1923 return result
1924 else:
1925 # Keep reading chunks until we have n characters to return.
1926 eof = False
1927 result = self._get_decoded_chars(n)
1928 while len(result) < n and not eof:
1929 eof = not self._read_chunk()
1930 result += self._get_decoded_chars(n - len(result))
1931 return result
1932
1933 def __next__(self):
1934 self._telling = False
1935 line = self.readline()
1936 if not line:
1937 self._snapshot = None
1938 self._telling = self._seekable
1939 raise StopIteration
1940 return line
1941
1942 def readline(self, limit=None):
1943 if self.closed:
1944 raise ValueError("read from closed file")
1945 if limit is None:
1946 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001947 elif not isinstance(limit, int):
1948 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949
1950 # Grab all the decoded text (we will rewind any extra bits later).
1951 line = self._get_decoded_chars()
1952
1953 start = 0
1954 # Make the decoder if it doesn't already exist.
1955 if not self._decoder:
1956 self._get_decoder()
1957
1958 pos = endpos = None
1959 while True:
1960 if self._readtranslate:
1961 # Newlines are already translated, only search for \n
1962 pos = line.find('\n', start)
1963 if pos >= 0:
1964 endpos = pos + 1
1965 break
1966 else:
1967 start = len(line)
1968
1969 elif self._readuniversal:
1970 # Universal newline search. Find any of \r, \r\n, \n
1971 # The decoder ensures that \r\n are not split in two pieces
1972
1973 # In C we'd look for these in parallel of course.
1974 nlpos = line.find("\n", start)
1975 crpos = line.find("\r", start)
1976 if crpos == -1:
1977 if nlpos == -1:
1978 # Nothing found
1979 start = len(line)
1980 else:
1981 # Found \n
1982 endpos = nlpos + 1
1983 break
1984 elif nlpos == -1:
1985 # Found lone \r
1986 endpos = crpos + 1
1987 break
1988 elif nlpos < crpos:
1989 # Found \n
1990 endpos = nlpos + 1
1991 break
1992 elif nlpos == crpos + 1:
1993 # Found \r\n
1994 endpos = crpos + 2
1995 break
1996 else:
1997 # Found \r
1998 endpos = crpos + 1
1999 break
2000 else:
2001 # non-universal
2002 pos = line.find(self._readnl)
2003 if pos >= 0:
2004 endpos = pos + len(self._readnl)
2005 break
2006
2007 if limit >= 0 and len(line) >= limit:
2008 endpos = limit # reached length limit
2009 break
2010
2011 # No line ending seen yet - get more data'
2012 while self._read_chunk():
2013 if self._decoded_chars:
2014 break
2015 if self._decoded_chars:
2016 line += self._get_decoded_chars()
2017 else:
2018 # end of file
2019 self._set_decoded_chars('')
2020 self._snapshot = None
2021 return line
2022
2023 if limit >= 0 and endpos > limit:
2024 endpos = limit # don't exceed limit
2025
2026 # Rewind _decoded_chars to just after the line ending we found.
2027 self._rewind_decoded_chars(len(line) - endpos)
2028 return line[:endpos]
2029
2030 @property
2031 def newlines(self):
2032 return self._decoder.newlines if self._decoder else None
2033
2034
2035class StringIO(TextIOWrapper):
2036 """Text I/O implementation using an in-memory buffer.
2037
2038 The initial_value argument sets the value of object. The newline
2039 argument is like the one of TextIOWrapper's constructor.
2040 """
2041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 def __init__(self, initial_value="", newline="\n"):
2043 super(StringIO, self).__init__(BytesIO(),
2044 encoding="utf-8",
2045 errors="strict",
2046 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002047 # Issue #5645: make universal newlines semantics the same as in the
2048 # C version, even under Windows.
2049 if newline is None:
2050 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002051 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002053 raise TypeError("initial_value must be str or None, not {0}"
2054 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 initial_value = str(initial_value)
2056 self.write(initial_value)
2057 self.seek(0)
2058
2059 def getvalue(self):
2060 self.flush()
2061 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002062
2063 def __repr__(self):
2064 # TextIOWrapper tells the encoding in its repr. In StringIO,
2065 # that's a implementation detail.
2066 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002067
2068 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002069 def errors(self):
2070 return None
2071
2072 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002073 def encoding(self):
2074 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002075
2076 def detach(self):
2077 # This doesn't make sense on StringIO.
2078 self._unsupported("detach")