blob: ad5bfcc920f40a65a370e466e1f7a244ff01b1ff [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Georg Brandl4d73b572011-01-13 07:13:06 +000037def open(file, mode="r", buffering=-1, encoding=None, errors=None,
38 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40 r"""Open file and return a stream. Raise IOError upon failure.
41
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
48 mode is an optional string that specifies the mode in which the file
49 is opened. It defaults to 'r' which means open for reading in text
50 mode. Other common values are 'w' for writing (truncating the file if
51 it already exists), and 'a' for appending (which on some Unix systems,
52 means that all writes append to the end of the file regardless of the
53 current seek position). In text mode, if encoding is not specified the
54 encoding used is platform dependent. (For reading and writing raw
55 bytes use binary mode and leave encoding unspecified.) The available
56 modes are:
57
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
63 'a' open for writing, appending to the end of the file if it exists
64 'b' binary mode
65 't' text mode (default)
66 '+' open a disk file for updating (reading and writing)
67 'U' universal newline mode (for backwards compatibility; unneeded
68 for new code)
69 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
73 'r+b' opens the file without truncation.
74
75 Python distinguishes between files opened in binary and text modes,
76 even when the underlying operating system doesn't. Files opened in
77 binary mode (appending 'b' to the mode argument) return contents as
78 bytes objects without any decoding. In text mode (the default, or when
79 't' is appended to the mode argument), the contents of the file are
80 returned as strings, the bytes having been first decoded using a
81 platform-dependent encoding or using the specified encoding if given.
82
Antoine Pitroud5587bc2009-12-19 21:08:31 +000083 buffering is an optional integer used to set the buffering policy.
84 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
85 line buffering (only usable in text mode), and an integer > 1 to indicate
86 the size of a fixed-size chunk buffer. When no buffering argument is
87 given, the default buffering policy works as follows:
88
89 * Binary files are buffered in fixed-size chunks; the size of the buffer
90 is chosen using a heuristic trying to determine the underlying device's
91 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
92 On many systems, the buffer will typically be 4096 or 8192 bytes long.
93
94 * "Interactive" text files (files for which isatty() returns True)
95 use line buffering. Other text files use the policy described above
96 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000097
Raymond Hettingercbb80892011-01-13 18:15:51 +000098 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099 file. This should only be used in text mode. The default encoding is
100 platform dependent, but any encoding supported by Python can be
101 passed. See the codecs module for the list of supported encodings.
102
103 errors is an optional string that specifies how encoding errors are to
104 be handled---this argument should not be used in binary mode. Pass
105 'strict' to raise a ValueError exception if there is an encoding error
106 (the default of None has the same effect), or pass 'ignore' to ignore
107 errors. (Note that ignoring encoding errors can lead to data loss.)
108 See the documentation for codecs.register for a list of the permitted
109 encoding error strings.
110
Raymond Hettingercbb80892011-01-13 18:15:51 +0000111 newline is a string controlling how universal newlines works (it only
112 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
113 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114
115 * On input, if newline is None, universal newlines mode is
116 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
117 these are translated into '\n' before being returned to the
118 caller. If it is '', universal newline mode is enabled, but line
119 endings are returned to the caller untranslated. If it has any of
120 the other legal values, input lines are only terminated by the given
121 string, and the line ending is returned to the caller untranslated.
122
123 * On output, if newline is None, any '\n' characters written are
124 translated to the system default line separator, os.linesep. If
125 newline is '', no translation takes place. If newline is any of the
126 other legal values, any '\n' characters written are translated to
127 the given string.
128
Raymond Hettingercbb80892011-01-13 18:15:51 +0000129 closedfd is a bool. If closefd is False, the underlying file descriptor will
130 be kept open when the file is closed. This does not work when a file name is
131 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000132
133 open() returns a file object whose type depends on the mode, and
134 through which the standard file operations such as reading and writing
135 are performed. When open() is used to open a file in a text mode ('w',
136 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
137 a file in a binary mode, the returned class varies: in read binary
138 mode, it returns a BufferedReader; in write binary and append binary
139 modes, it returns a BufferedWriter, and in read/write mode, it returns
140 a BufferedRandom.
141
142 It is also possible to use a string or bytearray as a file for both
143 reading and writing. For strings StringIO can be used like a file
144 opened in a text mode, and for bytes a BytesIO can be used like a file
145 opened in a binary mode.
146 """
147 if not isinstance(file, (str, bytes, int)):
148 raise TypeError("invalid file: %r" % file)
149 if not isinstance(mode, str):
150 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000151 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000152 raise TypeError("invalid buffering: %r" % buffering)
153 if encoding is not None and not isinstance(encoding, str):
154 raise TypeError("invalid encoding: %r" % encoding)
155 if errors is not None and not isinstance(errors, str):
156 raise TypeError("invalid errors: %r" % errors)
157 modes = set(mode)
158 if modes - set("arwb+tU") or len(mode) > len(modes):
159 raise ValueError("invalid mode: %r" % mode)
160 reading = "r" in modes
161 writing = "w" in modes
162 appending = "a" in modes
163 updating = "+" in modes
164 text = "t" in modes
165 binary = "b" in modes
166 if "U" in modes:
167 if writing or appending:
168 raise ValueError("can't use U and writing mode at once")
169 reading = True
170 if text and binary:
171 raise ValueError("can't have text and binary mode at once")
172 if reading + writing + appending > 1:
173 raise ValueError("can't have read/write/append mode at once")
174 if not (reading or writing or appending):
175 raise ValueError("must have exactly one of read/write/append mode")
176 if binary and encoding is not None:
177 raise ValueError("binary mode doesn't take an encoding argument")
178 if binary and errors is not None:
179 raise ValueError("binary mode doesn't take an errors argument")
180 if binary and newline is not None:
181 raise ValueError("binary mode doesn't take a newline argument")
182 raw = FileIO(file,
183 (reading and "r" or "") +
184 (writing and "w" or "") +
185 (appending and "a" or "") +
186 (updating and "+" or ""),
187 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 line_buffering = False
189 if buffering == 1 or buffering < 0 and raw.isatty():
190 buffering = -1
191 line_buffering = True
192 if buffering < 0:
193 buffering = DEFAULT_BUFFER_SIZE
194 try:
195 bs = os.fstat(raw.fileno()).st_blksize
196 except (os.error, AttributeError):
197 pass
198 else:
199 if bs > 1:
200 buffering = bs
201 if buffering < 0:
202 raise ValueError("invalid buffering size")
203 if buffering == 0:
204 if binary:
205 return raw
206 raise ValueError("can't have unbuffered text I/O")
207 if updating:
208 buffer = BufferedRandom(raw, buffering)
209 elif writing or appending:
210 buffer = BufferedWriter(raw, buffering)
211 elif reading:
212 buffer = BufferedReader(raw, buffering)
213 else:
214 raise ValueError("unknown mode: %r" % mode)
215 if binary:
216 return buffer
217 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
218 text.mode = mode
219 return text
220
221
222class DocDescriptor:
223 """Helper for builtins.open.__doc__
224 """
225 def __get__(self, obj, typ):
226 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000227 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228 "errors=None, newline=None, closefd=True)\n\n" +
229 open.__doc__)
230
231class OpenWrapper:
232 """Wrapper for builtins.open
233
234 Trick so that open won't become a bound method when stored
235 as a class variable (as dbm.dumb does).
236
237 See initstdio() in Python/pythonrun.c.
238 """
239 __doc__ = DocDescriptor()
240
241 def __new__(cls, *args, **kwargs):
242 return open(*args, **kwargs)
243
244
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000245# In normal operation, both `UnsupportedOperation`s should be bound to the
246# same object.
247try:
248 UnsupportedOperation = io.UnsupportedOperation
249except AttributeError:
250 class UnsupportedOperation(ValueError, IOError):
251 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252
253
254class IOBase(metaclass=abc.ABCMeta):
255
256 """The abstract base class for all I/O classes, acting on streams of
257 bytes. There is no public constructor.
258
259 This class provides dummy implementations for many methods that
260 derived classes can override selectively; the default implementations
261 represent a file that cannot be read, written or seeked.
262
263 Even though IOBase does not declare read, readinto, or write because
264 their signatures will vary, implementations and clients should
265 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000266 may raise UnsupportedOperation when operations they do not support are
267 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268
269 The basic type used for binary data read from or written to a file is
270 bytes. bytearrays are accepted too, and in some cases (such as
271 readinto) needed. Text I/O classes work with str data.
272
273 Note that calling any method (even inquiries) on a closed stream is
274 undefined. Implementations may raise IOError in this case.
275
276 IOBase (and its subclasses) support the iterator protocol, meaning
277 that an IOBase object can be iterated over yielding the lines in a
278 stream.
279
280 IOBase also supports the :keyword:`with` statement. In this example,
281 fp is closed after the suite of the with statement is complete:
282
283 with open('spam.txt', 'r') as fp:
284 fp.write('Spam and eggs!')
285 """
286
287 ### Internal ###
288
Raymond Hettinger3c940242011-01-12 23:39:31 +0000289 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000290 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291 raise UnsupportedOperation("%s.%s() not supported" %
292 (self.__class__.__name__, name))
293
294 ### Positioning ###
295
Georg Brandl4d73b572011-01-13 07:13:06 +0000296 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297 """Change stream position.
298
299 Change the stream position to byte offset offset. offset is
300 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000301 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 * 0 -- start of stream (the default); offset should be zero or positive
304 * 1 -- current stream position; offset may be negative
305 * 2 -- end of stream; offset is usually negative
306
Raymond Hettingercbb80892011-01-13 18:15:51 +0000307 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 """
309 self._unsupported("seek")
310
Raymond Hettinger3c940242011-01-12 23:39:31 +0000311 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000312 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 return self.seek(0, 1)
314
Georg Brandl4d73b572011-01-13 07:13:06 +0000315 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """Truncate file to size bytes.
317
318 Size defaults to the current IO position as reported by tell(). Return
319 the new size.
320 """
321 self._unsupported("truncate")
322
323 ### Flush and close ###
324
Raymond Hettinger3c940242011-01-12 23:39:31 +0000325 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 """Flush write buffers, if applicable.
327
328 This is not implemented for read-only and non-blocking streams.
329 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000330 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 # XXX Should this return the number of bytes written???
332
333 __closed = False
334
Raymond Hettinger3c940242011-01-12 23:39:31 +0000335 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 """Flush and close the IO object.
337
338 This method has no effect if the file is already closed.
339 """
340 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000341 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 self.__closed = True
343
Raymond Hettinger3c940242011-01-12 23:39:31 +0000344 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 """Destructor. Calls close()."""
346 # The try/except block is in case this is called at program
347 # exit time, when it's possible that globals have already been
348 # deleted, and then the close() call might fail. Since
349 # there's nothing we can do about such failures and they annoy
350 # the end users, we suppress the traceback.
351 try:
352 self.close()
353 except:
354 pass
355
356 ### Inquiries ###
357
Raymond Hettinger3c940242011-01-12 23:39:31 +0000358 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000359 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000361 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000362 This method may need to do a test seek().
363 """
364 return False
365
366 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000367 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 """
369 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000370 raise UnsupportedOperation("File or stream is not seekable."
371 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372
Raymond Hettinger3c940242011-01-12 23:39:31 +0000373 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000374 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000376 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377 """
378 return False
379
380 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000381 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 """
383 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000384 raise UnsupportedOperation("File or stream is not readable."
385 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
Raymond Hettinger3c940242011-01-12 23:39:31 +0000387 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000388 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000390 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 """
392 return False
393
394 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000395 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 """
397 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000398 raise UnsupportedOperation("File or stream is not writable."
399 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400
401 @property
402 def closed(self):
403 """closed: bool. True iff the file has been closed.
404
405 For backwards compatibility, this is a property, not a predicate.
406 """
407 return self.__closed
408
409 def _checkClosed(self, msg=None):
410 """Internal: raise an ValueError if file is closed
411 """
412 if self.closed:
413 raise ValueError("I/O operation on closed file."
414 if msg is None else msg)
415
416 ### Context manager ###
417
Raymond Hettinger3c940242011-01-12 23:39:31 +0000418 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000419 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420 self._checkClosed()
421 return self
422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 """Context management protocol. Calls close()"""
425 self.close()
426
427 ### Lower-level APIs ###
428
429 # XXX Should these be present even if unimplemented?
430
Raymond Hettinger3c940242011-01-12 23:39:31 +0000431 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000432 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433
434 An IOError is raised if the IO object does not use a file descriptor.
435 """
436 self._unsupported("fileno")
437
Raymond Hettinger3c940242011-01-12 23:39:31 +0000438 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000439 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440
441 Return False if it can't be determined.
442 """
443 self._checkClosed()
444 return False
445
446 ### Readline[s] and writelines ###
447
Georg Brandl4d73b572011-01-13 07:13:06 +0000448 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450
451 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000452 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
Georg Brandl4d73b572011-01-13 07:13:06 +0000535 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000536 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000547 if n is None:
548 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
Raymond Hettinger3c940242011-01-12 23:39:31 +0000562 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000563 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564
Raymond Hettingercbb80892011-01-13 18:15:51 +0000565 Returns an int representing the number of bytes read (0 for EOF), or
566 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """
568 self._unsupported("readinto")
569
Raymond Hettinger3c940242011-01-12 23:39:31 +0000570 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
Georg Brandl4d73b572011-01-13 07:13:06 +0000599 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000600 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
Georg Brandl4d73b572011-01-13 07:13:06 +0000619 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000620 """Read up to n bytes with at most one read() system call,
621 where n is an int.
622 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 self._unsupported("read1")
624
Raymond Hettinger3c940242011-01-12 23:39:31 +0000625 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000626 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627
628 Like read(), this may issue multiple reads to the underlying raw
629 stream, unless the latter is 'interactive'.
630
Raymond Hettingercbb80892011-01-13 18:15:51 +0000631 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632
633 Raises BlockingIOError if the underlying raw stream has no
634 data at the moment.
635 """
636 # XXX This ought to work with anything that supports the buffer API
637 data = self.read(len(b))
638 n = len(data)
639 try:
640 b[:n] = data
641 except TypeError as err:
642 import array
643 if not isinstance(b, array.array):
644 raise err
645 b[:n] = array.array('b', data)
646 return n
647
Raymond Hettinger3c940242011-01-12 23:39:31 +0000648 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000649 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650
651 Return the number of bytes written, which is never less than
652 len(b).
653
654 Raises BlockingIOError if the buffer is full and the
655 underlying raw stream cannot accept more data at the moment.
656 """
657 self._unsupported("write")
658
Raymond Hettinger3c940242011-01-12 23:39:31 +0000659 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000660 """
661 Separate the underlying raw stream from the buffer and return it.
662
663 After the raw stream has been detached, the buffer is in an unusable
664 state.
665 """
666 self._unsupported("detach")
667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668io.BufferedIOBase.register(BufferedIOBase)
669
670
671class _BufferedIOMixin(BufferedIOBase):
672
673 """A mixin implementation of BufferedIOBase with an underlying raw stream.
674
675 This passes most requests on to the underlying raw stream. It
676 does *not* provide implementations of read(), readinto() or
677 write().
678 """
679
680 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000681 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 ### Positioning ###
684
685 def seek(self, pos, whence=0):
686 new_position = self.raw.seek(pos, whence)
687 if new_position < 0:
688 raise IOError("seek() returned an invalid position")
689 return new_position
690
691 def tell(self):
692 pos = self.raw.tell()
693 if pos < 0:
694 raise IOError("tell() returned an invalid position")
695 return pos
696
697 def truncate(self, pos=None):
698 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
699 # and a flush may be necessary to synch both views of the current
700 # file state.
701 self.flush()
702
703 if pos is None:
704 pos = self.tell()
705 # XXX: Should seek() be used, instead of passing the position
706 # XXX directly to truncate?
707 return self.raw.truncate(pos)
708
709 ### Flush and close ###
710
711 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000712 if self.closed:
713 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714 self.raw.flush()
715
716 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000717 if self.raw is not None and not self.closed:
718 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719 self.raw.close()
720
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000721 def detach(self):
722 if self.raw is None:
723 raise ValueError("raw stream already detached")
724 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000725 raw = self._raw
726 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000727 return raw
728
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729 ### Inquiries ###
730
731 def seekable(self):
732 return self.raw.seekable()
733
734 def readable(self):
735 return self.raw.readable()
736
737 def writable(self):
738 return self.raw.writable()
739
740 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000741 def raw(self):
742 return self._raw
743
744 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745 def closed(self):
746 return self.raw.closed
747
748 @property
749 def name(self):
750 return self.raw.name
751
752 @property
753 def mode(self):
754 return self.raw.mode
755
Antoine Pitrou243757e2010-11-05 21:15:39 +0000756 def __getstate__(self):
757 raise TypeError("can not serialize a '{0}' object"
758 .format(self.__class__.__name__))
759
Antoine Pitrou716c4442009-05-23 19:04:03 +0000760 def __repr__(self):
761 clsname = self.__class__.__name__
762 try:
763 name = self.name
764 except AttributeError:
765 return "<_pyio.{0}>".format(clsname)
766 else:
767 return "<_pyio.{0} name={1!r}>".format(clsname, name)
768
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769 ### Lower-level APIs ###
770
771 def fileno(self):
772 return self.raw.fileno()
773
774 def isatty(self):
775 return self.raw.isatty()
776
777
778class BytesIO(BufferedIOBase):
779
780 """Buffered I/O implementation using an in-memory bytes buffer."""
781
782 def __init__(self, initial_bytes=None):
783 buf = bytearray()
784 if initial_bytes is not None:
785 buf += initial_bytes
786 self._buffer = buf
787 self._pos = 0
788
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000789 def __getstate__(self):
790 if self.closed:
791 raise ValueError("__getstate__ on closed file")
792 return self.__dict__.copy()
793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 def getvalue(self):
795 """Return the bytes value (contents) of the buffer
796 """
797 if self.closed:
798 raise ValueError("getvalue on closed file")
799 return bytes(self._buffer)
800
Antoine Pitrou972ee132010-09-06 18:48:21 +0000801 def getbuffer(self):
802 """Return a readable and writable view of the buffer.
803 """
804 return memoryview(self._buffer)
805
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 def read(self, n=None):
807 if self.closed:
808 raise ValueError("read from closed file")
809 if n is None:
810 n = -1
811 if n < 0:
812 n = len(self._buffer)
813 if len(self._buffer) <= self._pos:
814 return b""
815 newpos = min(len(self._buffer), self._pos + n)
816 b = self._buffer[self._pos : newpos]
817 self._pos = newpos
818 return bytes(b)
819
820 def read1(self, n):
821 """This is the same as read.
822 """
823 return self.read(n)
824
825 def write(self, b):
826 if self.closed:
827 raise ValueError("write to closed file")
828 if isinstance(b, str):
829 raise TypeError("can't write str to binary stream")
830 n = len(b)
831 if n == 0:
832 return 0
833 pos = self._pos
834 if pos > len(self._buffer):
835 # Inserts null bytes between the current end of the file
836 # and the new write position.
837 padding = b'\x00' * (pos - len(self._buffer))
838 self._buffer += padding
839 self._buffer[pos:pos + n] = b
840 self._pos += n
841 return n
842
843 def seek(self, pos, whence=0):
844 if self.closed:
845 raise ValueError("seek on closed file")
846 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000847 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 except AttributeError as err:
849 raise TypeError("an integer is required") from err
850 if whence == 0:
851 if pos < 0:
852 raise ValueError("negative seek position %r" % (pos,))
853 self._pos = pos
854 elif whence == 1:
855 self._pos = max(0, self._pos + pos)
856 elif whence == 2:
857 self._pos = max(0, len(self._buffer) + pos)
858 else:
859 raise ValueError("invalid whence value")
860 return self._pos
861
862 def tell(self):
863 if self.closed:
864 raise ValueError("tell on closed file")
865 return self._pos
866
867 def truncate(self, pos=None):
868 if self.closed:
869 raise ValueError("truncate on closed file")
870 if pos is None:
871 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000872 else:
873 try:
874 pos.__index__
875 except AttributeError as err:
876 raise TypeError("an integer is required") from err
877 if pos < 0:
878 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000880 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881
882 def readable(self):
883 return True
884
885 def writable(self):
886 return True
887
888 def seekable(self):
889 return True
890
891
892class BufferedReader(_BufferedIOMixin):
893
894 """BufferedReader(raw[, buffer_size])
895
896 A buffer for a readable, sequential BaseRawIO object.
897
898 The constructor creates a BufferedReader for the given readable raw
899 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
900 is used.
901 """
902
903 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
904 """Create a new buffered reader using the given readable raw IO object.
905 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000906 if not raw.readable():
907 raise IOError('"raw" argument must be readable.')
908
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 _BufferedIOMixin.__init__(self, raw)
910 if buffer_size <= 0:
911 raise ValueError("invalid buffer size")
912 self.buffer_size = buffer_size
913 self._reset_read_buf()
914 self._read_lock = Lock()
915
916 def _reset_read_buf(self):
917 self._read_buf = b""
918 self._read_pos = 0
919
920 def read(self, n=None):
921 """Read n bytes.
922
923 Returns exactly n bytes of data unless the underlying raw IO
924 stream reaches EOF or if the call would block in non-blocking
925 mode. If n is negative, read until EOF or until read() would
926 block.
927 """
928 if n is not None and n < -1:
929 raise ValueError("invalid number of bytes to read")
930 with self._read_lock:
931 return self._read_unlocked(n)
932
933 def _read_unlocked(self, n=None):
934 nodata_val = b""
935 empty_values = (b"", None)
936 buf = self._read_buf
937 pos = self._read_pos
938
939 # Special case for when the number of bytes to read is unspecified.
940 if n is None or n == -1:
941 self._reset_read_buf()
942 chunks = [buf[pos:]] # Strip the consumed bytes.
943 current_size = 0
944 while True:
945 # Read until EOF or until read() would block.
946 chunk = self.raw.read()
947 if chunk in empty_values:
948 nodata_val = chunk
949 break
950 current_size += len(chunk)
951 chunks.append(chunk)
952 return b"".join(chunks) or nodata_val
953
954 # The number of bytes to read is specified, return at most n bytes.
955 avail = len(buf) - pos # Length of the available buffered data.
956 if n <= avail:
957 # Fast path: the data to read is fully buffered.
958 self._read_pos += n
959 return buf[pos:pos+n]
960 # Slow path: read from the stream until enough bytes are read,
961 # or until an EOF occurs or until read() would block.
962 chunks = [buf[pos:]]
963 wanted = max(self.buffer_size, n)
964 while avail < n:
965 chunk = self.raw.read(wanted)
966 if chunk in empty_values:
967 nodata_val = chunk
968 break
969 avail += len(chunk)
970 chunks.append(chunk)
971 # n is more then avail only when an EOF occurred or when
972 # read() would have blocked.
973 n = min(n, avail)
974 out = b"".join(chunks)
975 self._read_buf = out[n:] # Save the extra data in the buffer.
976 self._read_pos = 0
977 return out[:n] if out else nodata_val
978
979 def peek(self, n=0):
980 """Returns buffered bytes without advancing the position.
981
982 The argument indicates a desired minimal number of bytes; we
983 do at most one raw read to satisfy it. We never return more
984 than self.buffer_size.
985 """
986 with self._read_lock:
987 return self._peek_unlocked(n)
988
989 def _peek_unlocked(self, n=0):
990 want = min(n, self.buffer_size)
991 have = len(self._read_buf) - self._read_pos
992 if have < want or have <= 0:
993 to_read = self.buffer_size - have
994 current = self.raw.read(to_read)
995 if current:
996 self._read_buf = self._read_buf[self._read_pos:] + current
997 self._read_pos = 0
998 return self._read_buf[self._read_pos:]
999
1000 def read1(self, n):
1001 """Reads up to n bytes, with at most one read() system call."""
1002 # Returns up to n bytes. If at least one byte is buffered, we
1003 # only return buffered bytes. Otherwise, we do one raw read.
1004 if n < 0:
1005 raise ValueError("number of bytes to read must be positive")
1006 if n == 0:
1007 return b""
1008 with self._read_lock:
1009 self._peek_unlocked(1)
1010 return self._read_unlocked(
1011 min(n, len(self._read_buf) - self._read_pos))
1012
1013 def tell(self):
1014 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1015
1016 def seek(self, pos, whence=0):
1017 if not (0 <= whence <= 2):
1018 raise ValueError("invalid whence value")
1019 with self._read_lock:
1020 if whence == 1:
1021 pos -= len(self._read_buf) - self._read_pos
1022 pos = _BufferedIOMixin.seek(self, pos, whence)
1023 self._reset_read_buf()
1024 return pos
1025
1026class BufferedWriter(_BufferedIOMixin):
1027
1028 """A buffer for a writeable sequential RawIO object.
1029
1030 The constructor creates a BufferedWriter for the given writeable raw
1031 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001032 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 """
1034
Benjamin Peterson59406a92009-03-26 17:10:29 +00001035 _warning_stack_offset = 2
1036
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037 def __init__(self, raw,
1038 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001039 if not raw.writable():
1040 raise IOError('"raw" argument must be writable.')
1041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 _BufferedIOMixin.__init__(self, raw)
1043 if buffer_size <= 0:
1044 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001045 if max_buffer_size is not None:
1046 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1047 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 self._write_buf = bytearray()
1050 self._write_lock = Lock()
1051
1052 def write(self, b):
1053 if self.closed:
1054 raise ValueError("write to closed file")
1055 if isinstance(b, str):
1056 raise TypeError("can't write str to binary stream")
1057 with self._write_lock:
1058 # XXX we can implement some more tricks to try and avoid
1059 # partial writes
1060 if len(self._write_buf) > self.buffer_size:
1061 # We're full, so let's pre-flush the buffer
1062 try:
1063 self._flush_unlocked()
1064 except BlockingIOError as e:
1065 # We can't accept anything else.
1066 # XXX Why not just let the exception pass through?
1067 raise BlockingIOError(e.errno, e.strerror, 0)
1068 before = len(self._write_buf)
1069 self._write_buf.extend(b)
1070 written = len(self._write_buf) - before
1071 if len(self._write_buf) > self.buffer_size:
1072 try:
1073 self._flush_unlocked()
1074 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001075 if len(self._write_buf) > self.buffer_size:
1076 # We've hit the buffer_size. We have to accept a partial
1077 # write and cut back our buffer.
1078 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001080 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 raise BlockingIOError(e.errno, e.strerror, written)
1082 return written
1083
1084 def truncate(self, pos=None):
1085 with self._write_lock:
1086 self._flush_unlocked()
1087 if pos is None:
1088 pos = self.raw.tell()
1089 return self.raw.truncate(pos)
1090
1091 def flush(self):
1092 with self._write_lock:
1093 self._flush_unlocked()
1094
1095 def _flush_unlocked(self):
1096 if self.closed:
1097 raise ValueError("flush of closed file")
1098 written = 0
1099 try:
1100 while self._write_buf:
1101 n = self.raw.write(self._write_buf)
1102 if n > len(self._write_buf) or n < 0:
1103 raise IOError("write() returned incorrect number of bytes")
1104 del self._write_buf[:n]
1105 written += n
1106 except BlockingIOError as e:
1107 n = e.characters_written
1108 del self._write_buf[:n]
1109 written += n
1110 raise BlockingIOError(e.errno, e.strerror, written)
1111
1112 def tell(self):
1113 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1114
1115 def seek(self, pos, whence=0):
1116 if not (0 <= whence <= 2):
1117 raise ValueError("invalid whence")
1118 with self._write_lock:
1119 self._flush_unlocked()
1120 return _BufferedIOMixin.seek(self, pos, whence)
1121
1122
1123class BufferedRWPair(BufferedIOBase):
1124
1125 """A buffered reader and writer object together.
1126
1127 A buffered reader object and buffered writer object put together to
1128 form a sequential IO object that can read and write. This is typically
1129 used with a socket or two-way pipe.
1130
1131 reader and writer are RawIOBase objects that are readable and
1132 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001133 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134 """
1135
1136 # XXX The usefulness of this (compared to having two separate IO
1137 # objects) is questionable.
1138
1139 def __init__(self, reader, writer,
1140 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1141 """Constructor.
1142
1143 The arguments are two RawIO instances.
1144 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001145 if max_buffer_size is not None:
1146 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001147
1148 if not reader.readable():
1149 raise IOError('"reader" argument must be readable.')
1150
1151 if not writer.writable():
1152 raise IOError('"writer" argument must be writable.')
1153
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001155 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156
1157 def read(self, n=None):
1158 if n is None:
1159 n = -1
1160 return self.reader.read(n)
1161
1162 def readinto(self, b):
1163 return self.reader.readinto(b)
1164
1165 def write(self, b):
1166 return self.writer.write(b)
1167
1168 def peek(self, n=0):
1169 return self.reader.peek(n)
1170
1171 def read1(self, n):
1172 return self.reader.read1(n)
1173
1174 def readable(self):
1175 return self.reader.readable()
1176
1177 def writable(self):
1178 return self.writer.writable()
1179
1180 def flush(self):
1181 return self.writer.flush()
1182
1183 def close(self):
1184 self.writer.close()
1185 self.reader.close()
1186
1187 def isatty(self):
1188 return self.reader.isatty() or self.writer.isatty()
1189
1190 @property
1191 def closed(self):
1192 return self.writer.closed
1193
1194
1195class BufferedRandom(BufferedWriter, BufferedReader):
1196
1197 """A buffered interface to random access streams.
1198
1199 The constructor creates a reader and writer for a seekable stream,
1200 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001201 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 """
1203
Benjamin Peterson59406a92009-03-26 17:10:29 +00001204 _warning_stack_offset = 3
1205
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206 def __init__(self, raw,
1207 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1208 raw._checkSeekable()
1209 BufferedReader.__init__(self, raw, buffer_size)
1210 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1211
1212 def seek(self, pos, whence=0):
1213 if not (0 <= whence <= 2):
1214 raise ValueError("invalid whence")
1215 self.flush()
1216 if self._read_buf:
1217 # Undo read ahead.
1218 with self._read_lock:
1219 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1220 # First do the raw seek, then empty the read buffer, so that
1221 # if the raw seek fails, we don't lose buffered data forever.
1222 pos = self.raw.seek(pos, whence)
1223 with self._read_lock:
1224 self._reset_read_buf()
1225 if pos < 0:
1226 raise IOError("seek() returned invalid position")
1227 return pos
1228
1229 def tell(self):
1230 if self._write_buf:
1231 return BufferedWriter.tell(self)
1232 else:
1233 return BufferedReader.tell(self)
1234
1235 def truncate(self, pos=None):
1236 if pos is None:
1237 pos = self.tell()
1238 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001239 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240
1241 def read(self, n=None):
1242 if n is None:
1243 n = -1
1244 self.flush()
1245 return BufferedReader.read(self, n)
1246
1247 def readinto(self, b):
1248 self.flush()
1249 return BufferedReader.readinto(self, b)
1250
1251 def peek(self, n=0):
1252 self.flush()
1253 return BufferedReader.peek(self, n)
1254
1255 def read1(self, n):
1256 self.flush()
1257 return BufferedReader.read1(self, n)
1258
1259 def write(self, b):
1260 if self._read_buf:
1261 # Undo readahead
1262 with self._read_lock:
1263 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1264 self._reset_read_buf()
1265 return BufferedWriter.write(self, b)
1266
1267
1268class TextIOBase(IOBase):
1269
1270 """Base class for text I/O.
1271
1272 This class provides a character and line based interface to stream
1273 I/O. There is no readinto method because Python's character strings
1274 are immutable. There is no public constructor.
1275 """
1276
Georg Brandl4d73b572011-01-13 07:13:06 +00001277 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001278 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279
1280 Read from underlying buffer until we have n characters or we hit EOF.
1281 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001282
1283 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 """
1285 self._unsupported("read")
1286
Raymond Hettinger3c940242011-01-12 23:39:31 +00001287 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001288 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 self._unsupported("write")
1290
Georg Brandl4d73b572011-01-13 07:13:06 +00001291 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001292 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 self._unsupported("truncate")
1294
Raymond Hettinger3c940242011-01-12 23:39:31 +00001295 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 """Read until newline or EOF.
1297
1298 Returns an empty string if EOF is hit immediately.
1299 """
1300 self._unsupported("readline")
1301
Raymond Hettinger3c940242011-01-12 23:39:31 +00001302 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001303 """
1304 Separate the underlying buffer from the TextIOBase and return it.
1305
1306 After the underlying buffer has been detached, the TextIO is in an
1307 unusable state.
1308 """
1309 self._unsupported("detach")
1310
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001311 @property
1312 def encoding(self):
1313 """Subclasses should override."""
1314 return None
1315
1316 @property
1317 def newlines(self):
1318 """Line endings translated so far.
1319
1320 Only line endings translated during reading are considered.
1321
1322 Subclasses should override.
1323 """
1324 return None
1325
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001326 @property
1327 def errors(self):
1328 """Error setting of the decoder or encoder.
1329
1330 Subclasses should override."""
1331 return None
1332
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333io.TextIOBase.register(TextIOBase)
1334
1335
1336class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1337 r"""Codec used when reading a file in universal newlines mode. It wraps
1338 another incremental decoder, translating \r\n and \r into \n. It also
1339 records the types of newlines encountered. When used with
1340 translate=False, it ensures that the newline sequence is returned in
1341 one piece.
1342 """
1343 def __init__(self, decoder, translate, errors='strict'):
1344 codecs.IncrementalDecoder.__init__(self, errors=errors)
1345 self.translate = translate
1346 self.decoder = decoder
1347 self.seennl = 0
1348 self.pendingcr = False
1349
1350 def decode(self, input, final=False):
1351 # decode input (with the eventual \r from a previous pass)
1352 if self.decoder is None:
1353 output = input
1354 else:
1355 output = self.decoder.decode(input, final=final)
1356 if self.pendingcr and (output or final):
1357 output = "\r" + output
1358 self.pendingcr = False
1359
1360 # retain last \r even when not translating data:
1361 # then readline() is sure to get \r\n in one pass
1362 if output.endswith("\r") and not final:
1363 output = output[:-1]
1364 self.pendingcr = True
1365
1366 # Record which newlines are read
1367 crlf = output.count('\r\n')
1368 cr = output.count('\r') - crlf
1369 lf = output.count('\n') - crlf
1370 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1371 | (crlf and self._CRLF)
1372
1373 if self.translate:
1374 if crlf:
1375 output = output.replace("\r\n", "\n")
1376 if cr:
1377 output = output.replace("\r", "\n")
1378
1379 return output
1380
1381 def getstate(self):
1382 if self.decoder is None:
1383 buf = b""
1384 flag = 0
1385 else:
1386 buf, flag = self.decoder.getstate()
1387 flag <<= 1
1388 if self.pendingcr:
1389 flag |= 1
1390 return buf, flag
1391
1392 def setstate(self, state):
1393 buf, flag = state
1394 self.pendingcr = bool(flag & 1)
1395 if self.decoder is not None:
1396 self.decoder.setstate((buf, flag >> 1))
1397
1398 def reset(self):
1399 self.seennl = 0
1400 self.pendingcr = False
1401 if self.decoder is not None:
1402 self.decoder.reset()
1403
1404 _LF = 1
1405 _CR = 2
1406 _CRLF = 4
1407
1408 @property
1409 def newlines(self):
1410 return (None,
1411 "\n",
1412 "\r",
1413 ("\r", "\n"),
1414 "\r\n",
1415 ("\n", "\r\n"),
1416 ("\r", "\r\n"),
1417 ("\r", "\n", "\r\n")
1418 )[self.seennl]
1419
1420
1421class TextIOWrapper(TextIOBase):
1422
1423 r"""Character and line based layer over a BufferedIOBase object, buffer.
1424
1425 encoding gives the name of the encoding that the stream will be
1426 decoded or encoded with. It defaults to locale.getpreferredencoding.
1427
1428 errors determines the strictness of encoding and decoding (see the
1429 codecs.register) and defaults to "strict".
1430
1431 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1432 handling of line endings. If it is None, universal newlines is
1433 enabled. With this enabled, on input, the lines endings '\n', '\r',
1434 or '\r\n' are translated to '\n' before being returned to the
1435 caller. Conversely, on output, '\n' is translated to the system
1436 default line seperator, os.linesep. If newline is any other of its
1437 legal values, that newline becomes the newline when the file is read
1438 and it is returned untranslated. On output, '\n' is converted to the
1439 newline.
1440
1441 If line_buffering is True, a call to flush is implied when a call to
1442 write contains a newline character.
1443 """
1444
1445 _CHUNK_SIZE = 2048
1446
1447 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1448 line_buffering=False):
1449 if newline is not None and not isinstance(newline, str):
1450 raise TypeError("illegal newline type: %r" % (type(newline),))
1451 if newline not in (None, "", "\n", "\r", "\r\n"):
1452 raise ValueError("illegal newline value: %r" % (newline,))
1453 if encoding is None:
1454 try:
1455 encoding = os.device_encoding(buffer.fileno())
1456 except (AttributeError, UnsupportedOperation):
1457 pass
1458 if encoding is None:
1459 try:
1460 import locale
1461 except ImportError:
1462 # Importing locale may fail if Python is being built
1463 encoding = "ascii"
1464 else:
1465 encoding = locale.getpreferredencoding()
1466
1467 if not isinstance(encoding, str):
1468 raise ValueError("invalid encoding: %r" % encoding)
1469
1470 if errors is None:
1471 errors = "strict"
1472 else:
1473 if not isinstance(errors, str):
1474 raise ValueError("invalid errors: %r" % errors)
1475
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001476 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001477 self._line_buffering = line_buffering
1478 self._encoding = encoding
1479 self._errors = errors
1480 self._readuniversal = not newline
1481 self._readtranslate = newline is None
1482 self._readnl = newline
1483 self._writetranslate = newline != ''
1484 self._writenl = newline or os.linesep
1485 self._encoder = None
1486 self._decoder = None
1487 self._decoded_chars = '' # buffer for text returned from decoder
1488 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1489 self._snapshot = None # info for reconstructing decoder state
1490 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001491 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492
Antoine Pitroue4501852009-05-14 18:55:55 +00001493 if self._seekable and self.writable():
1494 position = self.buffer.tell()
1495 if position != 0:
1496 try:
1497 self._get_encoder().setstate(0)
1498 except LookupError:
1499 # Sometimes the encoder doesn't exist
1500 pass
1501
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001502 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1503 # where dec_flags is the second (integer) item of the decoder state
1504 # and next_input is the chunk of input bytes that comes next after the
1505 # snapshot point. We use this to reconstruct decoder states in tell().
1506
1507 # Naming convention:
1508 # - "bytes_..." for integer variables that count input bytes
1509 # - "chars_..." for integer variables that count decoded characters
1510
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001511 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001512 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001513 try:
1514 name = self.name
1515 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001516 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001517 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001518 result += " name={0!r}".format(name)
1519 try:
1520 mode = self.mode
1521 except AttributeError:
1522 pass
1523 else:
1524 result += " mode={0!r}".format(mode)
1525 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001526
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527 @property
1528 def encoding(self):
1529 return self._encoding
1530
1531 @property
1532 def errors(self):
1533 return self._errors
1534
1535 @property
1536 def line_buffering(self):
1537 return self._line_buffering
1538
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001539 @property
1540 def buffer(self):
1541 return self._buffer
1542
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543 def seekable(self):
1544 return self._seekable
1545
1546 def readable(self):
1547 return self.buffer.readable()
1548
1549 def writable(self):
1550 return self.buffer.writable()
1551
1552 def flush(self):
1553 self.buffer.flush()
1554 self._telling = self._seekable
1555
1556 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001557 if self.buffer is not None and not self.closed:
1558 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001559 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001560
1561 @property
1562 def closed(self):
1563 return self.buffer.closed
1564
1565 @property
1566 def name(self):
1567 return self.buffer.name
1568
1569 def fileno(self):
1570 return self.buffer.fileno()
1571
1572 def isatty(self):
1573 return self.buffer.isatty()
1574
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001575 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001576 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 if self.closed:
1578 raise ValueError("write to closed file")
1579 if not isinstance(s, str):
1580 raise TypeError("can't write %s to text stream" %
1581 s.__class__.__name__)
1582 length = len(s)
1583 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1584 if haslf and self._writetranslate and self._writenl != "\n":
1585 s = s.replace("\n", self._writenl)
1586 encoder = self._encoder or self._get_encoder()
1587 # XXX What if we were just reading?
1588 b = encoder.encode(s)
1589 self.buffer.write(b)
1590 if self._line_buffering and (haslf or "\r" in s):
1591 self.flush()
1592 self._snapshot = None
1593 if self._decoder:
1594 self._decoder.reset()
1595 return length
1596
1597 def _get_encoder(self):
1598 make_encoder = codecs.getincrementalencoder(self._encoding)
1599 self._encoder = make_encoder(self._errors)
1600 return self._encoder
1601
1602 def _get_decoder(self):
1603 make_decoder = codecs.getincrementaldecoder(self._encoding)
1604 decoder = make_decoder(self._errors)
1605 if self._readuniversal:
1606 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1607 self._decoder = decoder
1608 return decoder
1609
1610 # The following three methods implement an ADT for _decoded_chars.
1611 # Text returned from the decoder is buffered here until the client
1612 # requests it by calling our read() or readline() method.
1613 def _set_decoded_chars(self, chars):
1614 """Set the _decoded_chars buffer."""
1615 self._decoded_chars = chars
1616 self._decoded_chars_used = 0
1617
1618 def _get_decoded_chars(self, n=None):
1619 """Advance into the _decoded_chars buffer."""
1620 offset = self._decoded_chars_used
1621 if n is None:
1622 chars = self._decoded_chars[offset:]
1623 else:
1624 chars = self._decoded_chars[offset:offset + n]
1625 self._decoded_chars_used += len(chars)
1626 return chars
1627
1628 def _rewind_decoded_chars(self, n):
1629 """Rewind the _decoded_chars buffer."""
1630 if self._decoded_chars_used < n:
1631 raise AssertionError("rewind decoded_chars out of bounds")
1632 self._decoded_chars_used -= n
1633
1634 def _read_chunk(self):
1635 """
1636 Read and decode the next chunk of data from the BufferedReader.
1637 """
1638
1639 # The return value is True unless EOF was reached. The decoded
1640 # string is placed in self._decoded_chars (replacing its previous
1641 # value). The entire input chunk is sent to the decoder, though
1642 # some of it may remain buffered in the decoder, yet to be
1643 # converted.
1644
1645 if self._decoder is None:
1646 raise ValueError("no decoder")
1647
1648 if self._telling:
1649 # To prepare for tell(), we need to snapshot a point in the
1650 # file where the decoder's input buffer is empty.
1651
1652 dec_buffer, dec_flags = self._decoder.getstate()
1653 # Given this, we know there was a valid snapshot point
1654 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1655
1656 # Read a chunk, decode it, and put the result in self._decoded_chars.
1657 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1658 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001659 decoded_chars = self._decoder.decode(input_chunk, eof)
1660 self._set_decoded_chars(decoded_chars)
1661 if decoded_chars:
1662 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1663 else:
1664 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665
1666 if self._telling:
1667 # At the snapshot point, len(dec_buffer) bytes before the read,
1668 # the next input to be decoded is dec_buffer + input_chunk.
1669 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1670
1671 return not eof
1672
1673 def _pack_cookie(self, position, dec_flags=0,
1674 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1675 # The meaning of a tell() cookie is: seek to position, set the
1676 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1677 # into the decoder with need_eof as the EOF flag, then skip
1678 # chars_to_skip characters of the decoded result. For most simple
1679 # decoders, tell() will often just give a byte offset in the file.
1680 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1681 (chars_to_skip<<192) | bool(need_eof)<<256)
1682
1683 def _unpack_cookie(self, bigint):
1684 rest, position = divmod(bigint, 1<<64)
1685 rest, dec_flags = divmod(rest, 1<<64)
1686 rest, bytes_to_feed = divmod(rest, 1<<64)
1687 need_eof, chars_to_skip = divmod(rest, 1<<64)
1688 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1689
1690 def tell(self):
1691 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001692 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if not self._telling:
1694 raise IOError("telling position disabled by next() call")
1695 self.flush()
1696 position = self.buffer.tell()
1697 decoder = self._decoder
1698 if decoder is None or self._snapshot is None:
1699 if self._decoded_chars:
1700 # This should never happen.
1701 raise AssertionError("pending decoded text")
1702 return position
1703
1704 # Skip backward to the snapshot point (see _read_chunk).
1705 dec_flags, next_input = self._snapshot
1706 position -= len(next_input)
1707
1708 # How many decoded characters have been used up since the snapshot?
1709 chars_to_skip = self._decoded_chars_used
1710 if chars_to_skip == 0:
1711 # We haven't moved from the snapshot point.
1712 return self._pack_cookie(position, dec_flags)
1713
1714 # Starting from the snapshot position, we will walk the decoder
1715 # forward until it gives us enough decoded characters.
1716 saved_state = decoder.getstate()
1717 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001718 # Fast search for an acceptable start point, close to our
1719 # current pos.
1720 # Rationale: calling decoder.decode() has a large overhead
1721 # regardless of chunk size; we want the number of such calls to
1722 # be O(1) in most situations (common decoders, non-crazy input).
1723 # Actually, it will be exactly 1 for fixed-size codecs (all
1724 # 8-bit codecs, also UTF-16 and UTF-32).
1725 skip_bytes = int(self._b2cratio * chars_to_skip)
1726 skip_back = 1
1727 assert skip_bytes <= len(next_input)
1728 while skip_bytes > 0:
1729 decoder.setstate((b'', dec_flags))
1730 # Decode up to temptative start point
1731 n = len(decoder.decode(next_input[:skip_bytes]))
1732 if n <= chars_to_skip:
1733 b, d = decoder.getstate()
1734 if not b:
1735 # Before pos and no bytes buffered in decoder => OK
1736 dec_flags = d
1737 chars_to_skip -= n
1738 break
1739 # Skip back by buffered amount and reset heuristic
1740 skip_bytes -= len(b)
1741 skip_back = 1
1742 else:
1743 # We're too far ahead, skip back a bit
1744 skip_bytes -= skip_back
1745 skip_back = skip_back * 2
1746 else:
1747 skip_bytes = 0
1748 decoder.setstate((b'', dec_flags))
1749
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001751 start_pos = position + skip_bytes
1752 start_flags = dec_flags
1753 if chars_to_skip == 0:
1754 # We haven't moved from the start point.
1755 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756
1757 # Feed the decoder one byte at a time. As we go, note the
1758 # nearest "safe start point" before the current location
1759 # (a point where the decoder has nothing buffered, so seek()
1760 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001761 bytes_fed = 0
1762 need_eof = 0
1763 # Chars decoded since `start_pos`
1764 chars_decoded = 0
1765 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001766 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001767 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 dec_buffer, dec_flags = decoder.getstate()
1769 if not dec_buffer and chars_decoded <= chars_to_skip:
1770 # Decoder buffer is empty, so this is a safe start point.
1771 start_pos += bytes_fed
1772 chars_to_skip -= chars_decoded
1773 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1774 if chars_decoded >= chars_to_skip:
1775 break
1776 else:
1777 # We didn't get enough decoded data; signal EOF to get more.
1778 chars_decoded += len(decoder.decode(b'', final=True))
1779 need_eof = 1
1780 if chars_decoded < chars_to_skip:
1781 raise IOError("can't reconstruct logical file position")
1782
1783 # The returned cookie corresponds to the last safe start point.
1784 return self._pack_cookie(
1785 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1786 finally:
1787 decoder.setstate(saved_state)
1788
1789 def truncate(self, pos=None):
1790 self.flush()
1791 if pos is None:
1792 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001793 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001795 def detach(self):
1796 if self.buffer is None:
1797 raise ValueError("buffer is already detached")
1798 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001799 buffer = self._buffer
1800 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001801 return buffer
1802
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 def seek(self, cookie, whence=0):
1804 if self.closed:
1805 raise ValueError("tell on closed file")
1806 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001807 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 if whence == 1: # seek relative to current position
1809 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001810 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 # Seeking to the current position should attempt to
1812 # sync the underlying buffer with the current position.
1813 whence = 0
1814 cookie = self.tell()
1815 if whence == 2: # seek relative to end of file
1816 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001817 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 self.flush()
1819 position = self.buffer.seek(0, 2)
1820 self._set_decoded_chars('')
1821 self._snapshot = None
1822 if self._decoder:
1823 self._decoder.reset()
1824 return position
1825 if whence != 0:
1826 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1827 (whence,))
1828 if cookie < 0:
1829 raise ValueError("negative seek position %r" % (cookie,))
1830 self.flush()
1831
1832 # The strategy of seek() is to go back to the safe start point
1833 # and replay the effect of read(chars_to_skip) from there.
1834 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1835 self._unpack_cookie(cookie)
1836
1837 # Seek back to the safe start point.
1838 self.buffer.seek(start_pos)
1839 self._set_decoded_chars('')
1840 self._snapshot = None
1841
1842 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001843 if cookie == 0 and self._decoder:
1844 self._decoder.reset()
1845 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 self._decoder = self._decoder or self._get_decoder()
1847 self._decoder.setstate((b'', dec_flags))
1848 self._snapshot = (dec_flags, b'')
1849
1850 if chars_to_skip:
1851 # Just like _read_chunk, feed the decoder and save a snapshot.
1852 input_chunk = self.buffer.read(bytes_to_feed)
1853 self._set_decoded_chars(
1854 self._decoder.decode(input_chunk, need_eof))
1855 self._snapshot = (dec_flags, input_chunk)
1856
1857 # Skip chars_to_skip of the decoded characters.
1858 if len(self._decoded_chars) < chars_to_skip:
1859 raise IOError("can't restore logical file position")
1860 self._decoded_chars_used = chars_to_skip
1861
Antoine Pitroue4501852009-05-14 18:55:55 +00001862 # Finally, reset the encoder (merely useful for proper BOM handling)
1863 try:
1864 encoder = self._encoder or self._get_encoder()
1865 except LookupError:
1866 # Sometimes the encoder doesn't exist
1867 pass
1868 else:
1869 if cookie != 0:
1870 encoder.setstate(0)
1871 else:
1872 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001873 return cookie
1874
1875 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001876 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877 if n is None:
1878 n = -1
1879 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001880 try:
1881 n.__index__
1882 except AttributeError as err:
1883 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884 if n < 0:
1885 # Read everything.
1886 result = (self._get_decoded_chars() +
1887 decoder.decode(self.buffer.read(), final=True))
1888 self._set_decoded_chars('')
1889 self._snapshot = None
1890 return result
1891 else:
1892 # Keep reading chunks until we have n characters to return.
1893 eof = False
1894 result = self._get_decoded_chars(n)
1895 while len(result) < n and not eof:
1896 eof = not self._read_chunk()
1897 result += self._get_decoded_chars(n - len(result))
1898 return result
1899
1900 def __next__(self):
1901 self._telling = False
1902 line = self.readline()
1903 if not line:
1904 self._snapshot = None
1905 self._telling = self._seekable
1906 raise StopIteration
1907 return line
1908
1909 def readline(self, limit=None):
1910 if self.closed:
1911 raise ValueError("read from closed file")
1912 if limit is None:
1913 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001914 elif not isinstance(limit, int):
1915 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001916
1917 # Grab all the decoded text (we will rewind any extra bits later).
1918 line = self._get_decoded_chars()
1919
1920 start = 0
1921 # Make the decoder if it doesn't already exist.
1922 if not self._decoder:
1923 self._get_decoder()
1924
1925 pos = endpos = None
1926 while True:
1927 if self._readtranslate:
1928 # Newlines are already translated, only search for \n
1929 pos = line.find('\n', start)
1930 if pos >= 0:
1931 endpos = pos + 1
1932 break
1933 else:
1934 start = len(line)
1935
1936 elif self._readuniversal:
1937 # Universal newline search. Find any of \r, \r\n, \n
1938 # The decoder ensures that \r\n are not split in two pieces
1939
1940 # In C we'd look for these in parallel of course.
1941 nlpos = line.find("\n", start)
1942 crpos = line.find("\r", start)
1943 if crpos == -1:
1944 if nlpos == -1:
1945 # Nothing found
1946 start = len(line)
1947 else:
1948 # Found \n
1949 endpos = nlpos + 1
1950 break
1951 elif nlpos == -1:
1952 # Found lone \r
1953 endpos = crpos + 1
1954 break
1955 elif nlpos < crpos:
1956 # Found \n
1957 endpos = nlpos + 1
1958 break
1959 elif nlpos == crpos + 1:
1960 # Found \r\n
1961 endpos = crpos + 2
1962 break
1963 else:
1964 # Found \r
1965 endpos = crpos + 1
1966 break
1967 else:
1968 # non-universal
1969 pos = line.find(self._readnl)
1970 if pos >= 0:
1971 endpos = pos + len(self._readnl)
1972 break
1973
1974 if limit >= 0 and len(line) >= limit:
1975 endpos = limit # reached length limit
1976 break
1977
1978 # No line ending seen yet - get more data'
1979 while self._read_chunk():
1980 if self._decoded_chars:
1981 break
1982 if self._decoded_chars:
1983 line += self._get_decoded_chars()
1984 else:
1985 # end of file
1986 self._set_decoded_chars('')
1987 self._snapshot = None
1988 return line
1989
1990 if limit >= 0 and endpos > limit:
1991 endpos = limit # don't exceed limit
1992
1993 # Rewind _decoded_chars to just after the line ending we found.
1994 self._rewind_decoded_chars(len(line) - endpos)
1995 return line[:endpos]
1996
1997 @property
1998 def newlines(self):
1999 return self._decoder.newlines if self._decoder else None
2000
2001
2002class StringIO(TextIOWrapper):
2003 """Text I/O implementation using an in-memory buffer.
2004
2005 The initial_value argument sets the value of object. The newline
2006 argument is like the one of TextIOWrapper's constructor.
2007 """
2008
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 def __init__(self, initial_value="", newline="\n"):
2010 super(StringIO, self).__init__(BytesIO(),
2011 encoding="utf-8",
2012 errors="strict",
2013 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002014 # Issue #5645: make universal newlines semantics the same as in the
2015 # C version, even under Windows.
2016 if newline is None:
2017 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002018 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002020 raise TypeError("initial_value must be str or None, not {0}"
2021 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022 initial_value = str(initial_value)
2023 self.write(initial_value)
2024 self.seek(0)
2025
2026 def getvalue(self):
2027 self.flush()
2028 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002029
2030 def __repr__(self):
2031 # TextIOWrapper tells the encoding in its repr. In StringIO,
2032 # that's a implementation detail.
2033 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002034
2035 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002036 def errors(self):
2037 return None
2038
2039 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002040 def encoding(self):
2041 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002042
2043 def detach(self):
2044 # This doesn't make sense on StringIO.
2045 self._unsupported("detach")