blob: a3b89e7236bd4ab8afbe0ac3593aa275b9337fe2 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou707ce822011-02-25 21:24:11 +000017from errno import EINTR
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
Georg Brandl4d73b572011-01-13 07:13:06 +000038def open(file, mode="r", buffering=-1, encoding=None, errors=None,
39 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
Raymond Hettingercbb80892011-01-13 18:15:51 +000099 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
Raymond Hettingercbb80892011-01-13 18:15:51 +0000112 newline is a string controlling how universal newlines works (it only
113 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
114 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
Raymond Hettingercbb80892011-01-13 18:15:51 +0000130 closedfd is a bool. If closefd is False, the underlying file descriptor will
131 be kept open when the file is closed. This does not work when a file name is
132 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
Raymond Hettinger3c940242011-01-12 23:39:31 +0000290 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000291 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
Georg Brandl4d73b572011-01-13 07:13:06 +0000297 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000302 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 """
310 self._unsupported("seek")
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000313 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314 return self.seek(0, 1)
315
Georg Brandl4d73b572011-01-13 07:13:06 +0000316 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
Raymond Hettinger3c940242011-01-12 23:39:31 +0000326 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
Raymond Hettinger3c940242011-01-12 23:39:31 +0000336 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
Raymond Hettinger3c940242011-01-12 23:39:31 +0000345 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000360 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000375 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000389 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000420 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 self._checkClosed()
422 return self
423
Raymond Hettinger3c940242011-01-12 23:39:31 +0000424 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000433 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000440 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
Georg Brandl4d73b572011-01-13 07:13:06 +0000449 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000450 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451
452 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
455 The line terminator is always b'\n' for binary files; for text
456 files, the newlines argument to open can be used to select the line
457 terminator(s) recognized.
458 """
459 # For backwards compatibility, a (slowish) readline().
460 if hasattr(self, "peek"):
461 def nreadahead():
462 readahead = self.peek(1)
463 if not readahead:
464 return 1
465 n = (readahead.find(b"\n") + 1) or len(readahead)
466 if limit >= 0:
467 n = min(n, limit)
468 return n
469 else:
470 def nreadahead():
471 return 1
472 if limit is None:
473 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000474 elif not isinstance(limit, int):
475 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 res = bytearray()
477 while limit < 0 or len(res) < limit:
478 b = self.read(nreadahead())
479 if not b:
480 break
481 res += b
482 if res.endswith(b"\n"):
483 break
484 return bytes(res)
485
486 def __iter__(self):
487 self._checkClosed()
488 return self
489
490 def __next__(self):
491 line = self.readline()
492 if not line:
493 raise StopIteration
494 return line
495
496 def readlines(self, hint=None):
497 """Return a list of lines from the stream.
498
499 hint can be specified to control the number of lines read: no more
500 lines will be read if the total size (in bytes/characters) of all
501 lines so far exceeds hint.
502 """
503 if hint is None or hint <= 0:
504 return list(self)
505 n = 0
506 lines = []
507 for line in self:
508 lines.append(line)
509 n += len(line)
510 if n >= hint:
511 break
512 return lines
513
514 def writelines(self, lines):
515 self._checkClosed()
516 for line in lines:
517 self.write(line)
518
519io.IOBase.register(IOBase)
520
521
522class RawIOBase(IOBase):
523
524 """Base class for raw binary I/O."""
525
526 # The read() method is implemented by calling readinto(); derived
527 # classes that want to support read() only need to implement
528 # readinto() as a primitive operation. In general, readinto() can be
529 # more efficient than read().
530
531 # (It would be tempting to also provide an implementation of
532 # readinto() in terms of read(), in case the latter is a more suitable
533 # primitive operation, but that would lead to nasty recursion in case
534 # a subclass doesn't implement either.)
535
Georg Brandl4d73b572011-01-13 07:13:06 +0000536 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000537 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538
539 Returns an empty bytes object on EOF, or None if the object is
540 set not to block and has no data to read.
541 """
542 if n is None:
543 n = -1
544 if n < 0:
545 return self.readall()
546 b = bytearray(n.__index__())
547 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000548 if n is None:
549 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550 del b[n:]
551 return bytes(b)
552
553 def readall(self):
554 """Read until EOF, using multiple read() call."""
555 res = bytearray()
556 while True:
557 data = self.read(DEFAULT_BUFFER_SIZE)
558 if not data:
559 break
560 res += data
561 return bytes(res)
562
Raymond Hettinger3c940242011-01-12 23:39:31 +0000563 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000564 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565
Raymond Hettingercbb80892011-01-13 18:15:51 +0000566 Returns an int representing the number of bytes read (0 for EOF), or
567 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568 """
569 self._unsupported("readinto")
570
Raymond Hettinger3c940242011-01-12 23:39:31 +0000571 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572 """Write the given buffer to the IO stream.
573
574 Returns the number of bytes written, which may be less than len(b).
575 """
576 self._unsupported("write")
577
578io.RawIOBase.register(RawIOBase)
579from _io import FileIO
580RawIOBase.register(FileIO)
581
582
583class BufferedIOBase(IOBase):
584
585 """Base class for buffered IO objects.
586
587 The main difference with RawIOBase is that the read() method
588 supports omitting the size argument, and does not have a default
589 implementation that defers to readinto().
590
591 In addition, read(), readinto() and write() may raise
592 BlockingIOError if the underlying raw stream is in non-blocking
593 mode and not ready; unlike their raw counterparts, they will never
594 return None.
595
596 A typical implementation should not inherit from a RawIOBase
597 implementation, but wrap one.
598 """
599
Georg Brandl4d73b572011-01-13 07:13:06 +0000600 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000601 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602
603 If the argument is omitted, None, or negative, reads and
604 returns all data until EOF.
605
606 If the argument is positive, and the underlying raw stream is
607 not 'interactive', multiple raw reads may be issued to satisfy
608 the byte count (unless EOF is reached first). But for
609 interactive raw streams (XXX and for pipes?), at most one raw
610 read will be issued, and a short result does not imply that
611 EOF is imminent.
612
613 Returns an empty bytes array on EOF.
614
615 Raises BlockingIOError if the underlying raw stream has no
616 data at the moment.
617 """
618 self._unsupported("read")
619
Georg Brandl4d73b572011-01-13 07:13:06 +0000620 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000621 """Read up to n bytes with at most one read() system call,
622 where n is an int.
623 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 self._unsupported("read1")
625
Raymond Hettinger3c940242011-01-12 23:39:31 +0000626 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000627 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000628
629 Like read(), this may issue multiple reads to the underlying raw
630 stream, unless the latter is 'interactive'.
631
Raymond Hettingercbb80892011-01-13 18:15:51 +0000632 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633
634 Raises BlockingIOError if the underlying raw stream has no
635 data at the moment.
636 """
637 # XXX This ought to work with anything that supports the buffer API
638 data = self.read(len(b))
639 n = len(data)
640 try:
641 b[:n] = data
642 except TypeError as err:
643 import array
644 if not isinstance(b, array.array):
645 raise err
646 b[:n] = array.array('b', data)
647 return n
648
Raymond Hettinger3c940242011-01-12 23:39:31 +0000649 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000650 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651
652 Return the number of bytes written, which is never less than
653 len(b).
654
655 Raises BlockingIOError if the buffer is full and the
656 underlying raw stream cannot accept more data at the moment.
657 """
658 self._unsupported("write")
659
Raymond Hettinger3c940242011-01-12 23:39:31 +0000660 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000661 """
662 Separate the underlying raw stream from the buffer and return it.
663
664 After the raw stream has been detached, the buffer is in an unusable
665 state.
666 """
667 self._unsupported("detach")
668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669io.BufferedIOBase.register(BufferedIOBase)
670
671
672class _BufferedIOMixin(BufferedIOBase):
673
674 """A mixin implementation of BufferedIOBase with an underlying raw stream.
675
676 This passes most requests on to the underlying raw stream. It
677 does *not* provide implementations of read(), readinto() or
678 write().
679 """
680
681 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000682 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000683
684 ### Positioning ###
685
686 def seek(self, pos, whence=0):
687 new_position = self.raw.seek(pos, whence)
688 if new_position < 0:
689 raise IOError("seek() returned an invalid position")
690 return new_position
691
692 def tell(self):
693 pos = self.raw.tell()
694 if pos < 0:
695 raise IOError("tell() returned an invalid position")
696 return pos
697
698 def truncate(self, pos=None):
699 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
700 # and a flush may be necessary to synch both views of the current
701 # file state.
702 self.flush()
703
704 if pos is None:
705 pos = self.tell()
706 # XXX: Should seek() be used, instead of passing the position
707 # XXX directly to truncate?
708 return self.raw.truncate(pos)
709
710 ### Flush and close ###
711
712 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000713 if self.closed:
714 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self.raw.flush()
716
717 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000718 if self.raw is not None and not self.closed:
719 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720 self.raw.close()
721
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000722 def detach(self):
723 if self.raw is None:
724 raise ValueError("raw stream already detached")
725 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000726 raw = self._raw
727 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000728 return raw
729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730 ### Inquiries ###
731
732 def seekable(self):
733 return self.raw.seekable()
734
735 def readable(self):
736 return self.raw.readable()
737
738 def writable(self):
739 return self.raw.writable()
740
741 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000742 def raw(self):
743 return self._raw
744
745 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746 def closed(self):
747 return self.raw.closed
748
749 @property
750 def name(self):
751 return self.raw.name
752
753 @property
754 def mode(self):
755 return self.raw.mode
756
Antoine Pitrou243757e2010-11-05 21:15:39 +0000757 def __getstate__(self):
758 raise TypeError("can not serialize a '{0}' object"
759 .format(self.__class__.__name__))
760
Antoine Pitrou716c4442009-05-23 19:04:03 +0000761 def __repr__(self):
762 clsname = self.__class__.__name__
763 try:
764 name = self.name
765 except AttributeError:
766 return "<_pyio.{0}>".format(clsname)
767 else:
768 return "<_pyio.{0} name={1!r}>".format(clsname, name)
769
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770 ### Lower-level APIs ###
771
772 def fileno(self):
773 return self.raw.fileno()
774
775 def isatty(self):
776 return self.raw.isatty()
777
778
779class BytesIO(BufferedIOBase):
780
781 """Buffered I/O implementation using an in-memory bytes buffer."""
782
783 def __init__(self, initial_bytes=None):
784 buf = bytearray()
785 if initial_bytes is not None:
786 buf += initial_bytes
787 self._buffer = buf
788 self._pos = 0
789
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000790 def __getstate__(self):
791 if self.closed:
792 raise ValueError("__getstate__ on closed file")
793 return self.__dict__.copy()
794
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 def getvalue(self):
796 """Return the bytes value (contents) of the buffer
797 """
798 if self.closed:
799 raise ValueError("getvalue on closed file")
800 return bytes(self._buffer)
801
Antoine Pitrou972ee132010-09-06 18:48:21 +0000802 def getbuffer(self):
803 """Return a readable and writable view of the buffer.
804 """
805 return memoryview(self._buffer)
806
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807 def read(self, n=None):
808 if self.closed:
809 raise ValueError("read from closed file")
810 if n is None:
811 n = -1
812 if n < 0:
813 n = len(self._buffer)
814 if len(self._buffer) <= self._pos:
815 return b""
816 newpos = min(len(self._buffer), self._pos + n)
817 b = self._buffer[self._pos : newpos]
818 self._pos = newpos
819 return bytes(b)
820
821 def read1(self, n):
822 """This is the same as read.
823 """
824 return self.read(n)
825
826 def write(self, b):
827 if self.closed:
828 raise ValueError("write to closed file")
829 if isinstance(b, str):
830 raise TypeError("can't write str to binary stream")
831 n = len(b)
832 if n == 0:
833 return 0
834 pos = self._pos
835 if pos > len(self._buffer):
836 # Inserts null bytes between the current end of the file
837 # and the new write position.
838 padding = b'\x00' * (pos - len(self._buffer))
839 self._buffer += padding
840 self._buffer[pos:pos + n] = b
841 self._pos += n
842 return n
843
844 def seek(self, pos, whence=0):
845 if self.closed:
846 raise ValueError("seek on closed file")
847 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000848 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 except AttributeError as err:
850 raise TypeError("an integer is required") from err
851 if whence == 0:
852 if pos < 0:
853 raise ValueError("negative seek position %r" % (pos,))
854 self._pos = pos
855 elif whence == 1:
856 self._pos = max(0, self._pos + pos)
857 elif whence == 2:
858 self._pos = max(0, len(self._buffer) + pos)
859 else:
860 raise ValueError("invalid whence value")
861 return self._pos
862
863 def tell(self):
864 if self.closed:
865 raise ValueError("tell on closed file")
866 return self._pos
867
868 def truncate(self, pos=None):
869 if self.closed:
870 raise ValueError("truncate on closed file")
871 if pos is None:
872 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000873 else:
874 try:
875 pos.__index__
876 except AttributeError as err:
877 raise TypeError("an integer is required") from err
878 if pos < 0:
879 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000880 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000881 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000882
883 def readable(self):
884 return True
885
886 def writable(self):
887 return True
888
889 def seekable(self):
890 return True
891
892
893class BufferedReader(_BufferedIOMixin):
894
895 """BufferedReader(raw[, buffer_size])
896
897 A buffer for a readable, sequential BaseRawIO object.
898
899 The constructor creates a BufferedReader for the given readable raw
900 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
901 is used.
902 """
903
904 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
905 """Create a new buffered reader using the given readable raw IO object.
906 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000907 if not raw.readable():
908 raise IOError('"raw" argument must be readable.')
909
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 _BufferedIOMixin.__init__(self, raw)
911 if buffer_size <= 0:
912 raise ValueError("invalid buffer size")
913 self.buffer_size = buffer_size
914 self._reset_read_buf()
915 self._read_lock = Lock()
916
917 def _reset_read_buf(self):
918 self._read_buf = b""
919 self._read_pos = 0
920
921 def read(self, n=None):
922 """Read n bytes.
923
924 Returns exactly n bytes of data unless the underlying raw IO
925 stream reaches EOF or if the call would block in non-blocking
926 mode. If n is negative, read until EOF or until read() would
927 block.
928 """
929 if n is not None and n < -1:
930 raise ValueError("invalid number of bytes to read")
931 with self._read_lock:
932 return self._read_unlocked(n)
933
934 def _read_unlocked(self, n=None):
935 nodata_val = b""
936 empty_values = (b"", None)
937 buf = self._read_buf
938 pos = self._read_pos
939
940 # Special case for when the number of bytes to read is unspecified.
941 if n is None or n == -1:
942 self._reset_read_buf()
943 chunks = [buf[pos:]] # Strip the consumed bytes.
944 current_size = 0
945 while True:
946 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000947 try:
948 chunk = self.raw.read()
949 except IOError as e:
950 if e.errno != EINTR:
951 raise
952 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 if chunk in empty_values:
954 nodata_val = chunk
955 break
956 current_size += len(chunk)
957 chunks.append(chunk)
958 return b"".join(chunks) or nodata_val
959
960 # The number of bytes to read is specified, return at most n bytes.
961 avail = len(buf) - pos # Length of the available buffered data.
962 if n <= avail:
963 # Fast path: the data to read is fully buffered.
964 self._read_pos += n
965 return buf[pos:pos+n]
966 # Slow path: read from the stream until enough bytes are read,
967 # or until an EOF occurs or until read() would block.
968 chunks = [buf[pos:]]
969 wanted = max(self.buffer_size, n)
970 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000971 try:
972 chunk = self.raw.read(wanted)
973 except IOError as e:
974 if e.errno != EINTR:
975 raise
976 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000977 if chunk in empty_values:
978 nodata_val = chunk
979 break
980 avail += len(chunk)
981 chunks.append(chunk)
982 # n is more then avail only when an EOF occurred or when
983 # read() would have blocked.
984 n = min(n, avail)
985 out = b"".join(chunks)
986 self._read_buf = out[n:] # Save the extra data in the buffer.
987 self._read_pos = 0
988 return out[:n] if out else nodata_val
989
990 def peek(self, n=0):
991 """Returns buffered bytes without advancing the position.
992
993 The argument indicates a desired minimal number of bytes; we
994 do at most one raw read to satisfy it. We never return more
995 than self.buffer_size.
996 """
997 with self._read_lock:
998 return self._peek_unlocked(n)
999
1000 def _peek_unlocked(self, n=0):
1001 want = min(n, self.buffer_size)
1002 have = len(self._read_buf) - self._read_pos
1003 if have < want or have <= 0:
1004 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001005 while True:
1006 try:
1007 current = self.raw.read(to_read)
1008 except IOError as e:
1009 if e.errno != EINTR:
1010 raise
1011 continue
1012 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if current:
1014 self._read_buf = self._read_buf[self._read_pos:] + current
1015 self._read_pos = 0
1016 return self._read_buf[self._read_pos:]
1017
1018 def read1(self, n):
1019 """Reads up to n bytes, with at most one read() system call."""
1020 # Returns up to n bytes. If at least one byte is buffered, we
1021 # only return buffered bytes. Otherwise, we do one raw read.
1022 if n < 0:
1023 raise ValueError("number of bytes to read must be positive")
1024 if n == 0:
1025 return b""
1026 with self._read_lock:
1027 self._peek_unlocked(1)
1028 return self._read_unlocked(
1029 min(n, len(self._read_buf) - self._read_pos))
1030
1031 def tell(self):
1032 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1033
1034 def seek(self, pos, whence=0):
1035 if not (0 <= whence <= 2):
1036 raise ValueError("invalid whence value")
1037 with self._read_lock:
1038 if whence == 1:
1039 pos -= len(self._read_buf) - self._read_pos
1040 pos = _BufferedIOMixin.seek(self, pos, whence)
1041 self._reset_read_buf()
1042 return pos
1043
1044class BufferedWriter(_BufferedIOMixin):
1045
1046 """A buffer for a writeable sequential RawIO object.
1047
1048 The constructor creates a BufferedWriter for the given writeable raw
1049 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001050 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001051 """
1052
Benjamin Peterson59406a92009-03-26 17:10:29 +00001053 _warning_stack_offset = 2
1054
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 def __init__(self, raw,
1056 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001057 if not raw.writable():
1058 raise IOError('"raw" argument must be writable.')
1059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 _BufferedIOMixin.__init__(self, raw)
1061 if buffer_size <= 0:
1062 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001063 if max_buffer_size is not None:
1064 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1065 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 self._write_buf = bytearray()
1068 self._write_lock = Lock()
1069
1070 def write(self, b):
1071 if self.closed:
1072 raise ValueError("write to closed file")
1073 if isinstance(b, str):
1074 raise TypeError("can't write str to binary stream")
1075 with self._write_lock:
1076 # XXX we can implement some more tricks to try and avoid
1077 # partial writes
1078 if len(self._write_buf) > self.buffer_size:
1079 # We're full, so let's pre-flush the buffer
1080 try:
1081 self._flush_unlocked()
1082 except BlockingIOError as e:
1083 # We can't accept anything else.
1084 # XXX Why not just let the exception pass through?
1085 raise BlockingIOError(e.errno, e.strerror, 0)
1086 before = len(self._write_buf)
1087 self._write_buf.extend(b)
1088 written = len(self._write_buf) - before
1089 if len(self._write_buf) > self.buffer_size:
1090 try:
1091 self._flush_unlocked()
1092 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001093 if len(self._write_buf) > self.buffer_size:
1094 # We've hit the buffer_size. We have to accept a partial
1095 # write and cut back our buffer.
1096 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001098 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001099 raise BlockingIOError(e.errno, e.strerror, written)
1100 return written
1101
1102 def truncate(self, pos=None):
1103 with self._write_lock:
1104 self._flush_unlocked()
1105 if pos is None:
1106 pos = self.raw.tell()
1107 return self.raw.truncate(pos)
1108
1109 def flush(self):
1110 with self._write_lock:
1111 self._flush_unlocked()
1112
1113 def _flush_unlocked(self):
1114 if self.closed:
1115 raise ValueError("flush of closed file")
1116 written = 0
1117 try:
1118 while self._write_buf:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001119 try:
1120 n = self.raw.write(self._write_buf)
1121 except IOError as e:
1122 if e.errno != EINTR:
1123 raise
1124 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 if n > len(self._write_buf) or n < 0:
1126 raise IOError("write() returned incorrect number of bytes")
1127 del self._write_buf[:n]
1128 written += n
1129 except BlockingIOError as e:
1130 n = e.characters_written
1131 del self._write_buf[:n]
1132 written += n
1133 raise BlockingIOError(e.errno, e.strerror, written)
1134
1135 def tell(self):
1136 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1137
1138 def seek(self, pos, whence=0):
1139 if not (0 <= whence <= 2):
1140 raise ValueError("invalid whence")
1141 with self._write_lock:
1142 self._flush_unlocked()
1143 return _BufferedIOMixin.seek(self, pos, whence)
1144
1145
1146class BufferedRWPair(BufferedIOBase):
1147
1148 """A buffered reader and writer object together.
1149
1150 A buffered reader object and buffered writer object put together to
1151 form a sequential IO object that can read and write. This is typically
1152 used with a socket or two-way pipe.
1153
1154 reader and writer are RawIOBase objects that are readable and
1155 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001156 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 """
1158
1159 # XXX The usefulness of this (compared to having two separate IO
1160 # objects) is questionable.
1161
1162 def __init__(self, reader, writer,
1163 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1164 """Constructor.
1165
1166 The arguments are two RawIO instances.
1167 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001168 if max_buffer_size is not None:
1169 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001170
1171 if not reader.readable():
1172 raise IOError('"reader" argument must be readable.')
1173
1174 if not writer.writable():
1175 raise IOError('"writer" argument must be writable.')
1176
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001178 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179
1180 def read(self, n=None):
1181 if n is None:
1182 n = -1
1183 return self.reader.read(n)
1184
1185 def readinto(self, b):
1186 return self.reader.readinto(b)
1187
1188 def write(self, b):
1189 return self.writer.write(b)
1190
1191 def peek(self, n=0):
1192 return self.reader.peek(n)
1193
1194 def read1(self, n):
1195 return self.reader.read1(n)
1196
1197 def readable(self):
1198 return self.reader.readable()
1199
1200 def writable(self):
1201 return self.writer.writable()
1202
1203 def flush(self):
1204 return self.writer.flush()
1205
1206 def close(self):
1207 self.writer.close()
1208 self.reader.close()
1209
1210 def isatty(self):
1211 return self.reader.isatty() or self.writer.isatty()
1212
1213 @property
1214 def closed(self):
1215 return self.writer.closed
1216
1217
1218class BufferedRandom(BufferedWriter, BufferedReader):
1219
1220 """A buffered interface to random access streams.
1221
1222 The constructor creates a reader and writer for a seekable stream,
1223 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001224 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225 """
1226
Benjamin Peterson59406a92009-03-26 17:10:29 +00001227 _warning_stack_offset = 3
1228
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 def __init__(self, raw,
1230 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1231 raw._checkSeekable()
1232 BufferedReader.__init__(self, raw, buffer_size)
1233 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1234
1235 def seek(self, pos, whence=0):
1236 if not (0 <= whence <= 2):
1237 raise ValueError("invalid whence")
1238 self.flush()
1239 if self._read_buf:
1240 # Undo read ahead.
1241 with self._read_lock:
1242 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1243 # First do the raw seek, then empty the read buffer, so that
1244 # if the raw seek fails, we don't lose buffered data forever.
1245 pos = self.raw.seek(pos, whence)
1246 with self._read_lock:
1247 self._reset_read_buf()
1248 if pos < 0:
1249 raise IOError("seek() returned invalid position")
1250 return pos
1251
1252 def tell(self):
1253 if self._write_buf:
1254 return BufferedWriter.tell(self)
1255 else:
1256 return BufferedReader.tell(self)
1257
1258 def truncate(self, pos=None):
1259 if pos is None:
1260 pos = self.tell()
1261 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001262 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263
1264 def read(self, n=None):
1265 if n is None:
1266 n = -1
1267 self.flush()
1268 return BufferedReader.read(self, n)
1269
1270 def readinto(self, b):
1271 self.flush()
1272 return BufferedReader.readinto(self, b)
1273
1274 def peek(self, n=0):
1275 self.flush()
1276 return BufferedReader.peek(self, n)
1277
1278 def read1(self, n):
1279 self.flush()
1280 return BufferedReader.read1(self, n)
1281
1282 def write(self, b):
1283 if self._read_buf:
1284 # Undo readahead
1285 with self._read_lock:
1286 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1287 self._reset_read_buf()
1288 return BufferedWriter.write(self, b)
1289
1290
1291class TextIOBase(IOBase):
1292
1293 """Base class for text I/O.
1294
1295 This class provides a character and line based interface to stream
1296 I/O. There is no readinto method because Python's character strings
1297 are immutable. There is no public constructor.
1298 """
1299
Georg Brandl4d73b572011-01-13 07:13:06 +00001300 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001301 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
1303 Read from underlying buffer until we have n characters or we hit EOF.
1304 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001305
1306 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 """
1308 self._unsupported("read")
1309
Raymond Hettinger3c940242011-01-12 23:39:31 +00001310 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001311 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 self._unsupported("write")
1313
Georg Brandl4d73b572011-01-13 07:13:06 +00001314 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001315 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 self._unsupported("truncate")
1317
Raymond Hettinger3c940242011-01-12 23:39:31 +00001318 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 """Read until newline or EOF.
1320
1321 Returns an empty string if EOF is hit immediately.
1322 """
1323 self._unsupported("readline")
1324
Raymond Hettinger3c940242011-01-12 23:39:31 +00001325 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001326 """
1327 Separate the underlying buffer from the TextIOBase and return it.
1328
1329 After the underlying buffer has been detached, the TextIO is in an
1330 unusable state.
1331 """
1332 self._unsupported("detach")
1333
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334 @property
1335 def encoding(self):
1336 """Subclasses should override."""
1337 return None
1338
1339 @property
1340 def newlines(self):
1341 """Line endings translated so far.
1342
1343 Only line endings translated during reading are considered.
1344
1345 Subclasses should override.
1346 """
1347 return None
1348
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001349 @property
1350 def errors(self):
1351 """Error setting of the decoder or encoder.
1352
1353 Subclasses should override."""
1354 return None
1355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356io.TextIOBase.register(TextIOBase)
1357
1358
1359class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1360 r"""Codec used when reading a file in universal newlines mode. It wraps
1361 another incremental decoder, translating \r\n and \r into \n. It also
1362 records the types of newlines encountered. When used with
1363 translate=False, it ensures that the newline sequence is returned in
1364 one piece.
1365 """
1366 def __init__(self, decoder, translate, errors='strict'):
1367 codecs.IncrementalDecoder.__init__(self, errors=errors)
1368 self.translate = translate
1369 self.decoder = decoder
1370 self.seennl = 0
1371 self.pendingcr = False
1372
1373 def decode(self, input, final=False):
1374 # decode input (with the eventual \r from a previous pass)
1375 if self.decoder is None:
1376 output = input
1377 else:
1378 output = self.decoder.decode(input, final=final)
1379 if self.pendingcr and (output or final):
1380 output = "\r" + output
1381 self.pendingcr = False
1382
1383 # retain last \r even when not translating data:
1384 # then readline() is sure to get \r\n in one pass
1385 if output.endswith("\r") and not final:
1386 output = output[:-1]
1387 self.pendingcr = True
1388
1389 # Record which newlines are read
1390 crlf = output.count('\r\n')
1391 cr = output.count('\r') - crlf
1392 lf = output.count('\n') - crlf
1393 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1394 | (crlf and self._CRLF)
1395
1396 if self.translate:
1397 if crlf:
1398 output = output.replace("\r\n", "\n")
1399 if cr:
1400 output = output.replace("\r", "\n")
1401
1402 return output
1403
1404 def getstate(self):
1405 if self.decoder is None:
1406 buf = b""
1407 flag = 0
1408 else:
1409 buf, flag = self.decoder.getstate()
1410 flag <<= 1
1411 if self.pendingcr:
1412 flag |= 1
1413 return buf, flag
1414
1415 def setstate(self, state):
1416 buf, flag = state
1417 self.pendingcr = bool(flag & 1)
1418 if self.decoder is not None:
1419 self.decoder.setstate((buf, flag >> 1))
1420
1421 def reset(self):
1422 self.seennl = 0
1423 self.pendingcr = False
1424 if self.decoder is not None:
1425 self.decoder.reset()
1426
1427 _LF = 1
1428 _CR = 2
1429 _CRLF = 4
1430
1431 @property
1432 def newlines(self):
1433 return (None,
1434 "\n",
1435 "\r",
1436 ("\r", "\n"),
1437 "\r\n",
1438 ("\n", "\r\n"),
1439 ("\r", "\r\n"),
1440 ("\r", "\n", "\r\n")
1441 )[self.seennl]
1442
1443
1444class TextIOWrapper(TextIOBase):
1445
1446 r"""Character and line based layer over a BufferedIOBase object, buffer.
1447
1448 encoding gives the name of the encoding that the stream will be
1449 decoded or encoded with. It defaults to locale.getpreferredencoding.
1450
1451 errors determines the strictness of encoding and decoding (see the
1452 codecs.register) and defaults to "strict".
1453
1454 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1455 handling of line endings. If it is None, universal newlines is
1456 enabled. With this enabled, on input, the lines endings '\n', '\r',
1457 or '\r\n' are translated to '\n' before being returned to the
1458 caller. Conversely, on output, '\n' is translated to the system
1459 default line seperator, os.linesep. If newline is any other of its
1460 legal values, that newline becomes the newline when the file is read
1461 and it is returned untranslated. On output, '\n' is converted to the
1462 newline.
1463
1464 If line_buffering is True, a call to flush is implied when a call to
1465 write contains a newline character.
1466 """
1467
1468 _CHUNK_SIZE = 2048
1469
1470 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1471 line_buffering=False):
1472 if newline is not None and not isinstance(newline, str):
1473 raise TypeError("illegal newline type: %r" % (type(newline),))
1474 if newline not in (None, "", "\n", "\r", "\r\n"):
1475 raise ValueError("illegal newline value: %r" % (newline,))
1476 if encoding is None:
1477 try:
1478 encoding = os.device_encoding(buffer.fileno())
1479 except (AttributeError, UnsupportedOperation):
1480 pass
1481 if encoding is None:
1482 try:
1483 import locale
1484 except ImportError:
1485 # Importing locale may fail if Python is being built
1486 encoding = "ascii"
1487 else:
1488 encoding = locale.getpreferredencoding()
1489
1490 if not isinstance(encoding, str):
1491 raise ValueError("invalid encoding: %r" % encoding)
1492
1493 if errors is None:
1494 errors = "strict"
1495 else:
1496 if not isinstance(errors, str):
1497 raise ValueError("invalid errors: %r" % errors)
1498
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001499 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 self._line_buffering = line_buffering
1501 self._encoding = encoding
1502 self._errors = errors
1503 self._readuniversal = not newline
1504 self._readtranslate = newline is None
1505 self._readnl = newline
1506 self._writetranslate = newline != ''
1507 self._writenl = newline or os.linesep
1508 self._encoder = None
1509 self._decoder = None
1510 self._decoded_chars = '' # buffer for text returned from decoder
1511 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1512 self._snapshot = None # info for reconstructing decoder state
1513 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001514 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515
Antoine Pitroue4501852009-05-14 18:55:55 +00001516 if self._seekable and self.writable():
1517 position = self.buffer.tell()
1518 if position != 0:
1519 try:
1520 self._get_encoder().setstate(0)
1521 except LookupError:
1522 # Sometimes the encoder doesn't exist
1523 pass
1524
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1526 # where dec_flags is the second (integer) item of the decoder state
1527 # and next_input is the chunk of input bytes that comes next after the
1528 # snapshot point. We use this to reconstruct decoder states in tell().
1529
1530 # Naming convention:
1531 # - "bytes_..." for integer variables that count input bytes
1532 # - "chars_..." for integer variables that count decoded characters
1533
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001534 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001535 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001536 try:
1537 name = self.name
1538 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001539 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001540 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001541 result += " name={0!r}".format(name)
1542 try:
1543 mode = self.mode
1544 except AttributeError:
1545 pass
1546 else:
1547 result += " mode={0!r}".format(mode)
1548 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001549
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 @property
1551 def encoding(self):
1552 return self._encoding
1553
1554 @property
1555 def errors(self):
1556 return self._errors
1557
1558 @property
1559 def line_buffering(self):
1560 return self._line_buffering
1561
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001562 @property
1563 def buffer(self):
1564 return self._buffer
1565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001566 def seekable(self):
1567 return self._seekable
1568
1569 def readable(self):
1570 return self.buffer.readable()
1571
1572 def writable(self):
1573 return self.buffer.writable()
1574
1575 def flush(self):
1576 self.buffer.flush()
1577 self._telling = self._seekable
1578
1579 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001580 if self.buffer is not None and not self.closed:
1581 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001582 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001583
1584 @property
1585 def closed(self):
1586 return self.buffer.closed
1587
1588 @property
1589 def name(self):
1590 return self.buffer.name
1591
1592 def fileno(self):
1593 return self.buffer.fileno()
1594
1595 def isatty(self):
1596 return self.buffer.isatty()
1597
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001598 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001599 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001600 if self.closed:
1601 raise ValueError("write to closed file")
1602 if not isinstance(s, str):
1603 raise TypeError("can't write %s to text stream" %
1604 s.__class__.__name__)
1605 length = len(s)
1606 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1607 if haslf and self._writetranslate and self._writenl != "\n":
1608 s = s.replace("\n", self._writenl)
1609 encoder = self._encoder or self._get_encoder()
1610 # XXX What if we were just reading?
1611 b = encoder.encode(s)
1612 self.buffer.write(b)
1613 if self._line_buffering and (haslf or "\r" in s):
1614 self.flush()
1615 self._snapshot = None
1616 if self._decoder:
1617 self._decoder.reset()
1618 return length
1619
1620 def _get_encoder(self):
1621 make_encoder = codecs.getincrementalencoder(self._encoding)
1622 self._encoder = make_encoder(self._errors)
1623 return self._encoder
1624
1625 def _get_decoder(self):
1626 make_decoder = codecs.getincrementaldecoder(self._encoding)
1627 decoder = make_decoder(self._errors)
1628 if self._readuniversal:
1629 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1630 self._decoder = decoder
1631 return decoder
1632
1633 # The following three methods implement an ADT for _decoded_chars.
1634 # Text returned from the decoder is buffered here until the client
1635 # requests it by calling our read() or readline() method.
1636 def _set_decoded_chars(self, chars):
1637 """Set the _decoded_chars buffer."""
1638 self._decoded_chars = chars
1639 self._decoded_chars_used = 0
1640
1641 def _get_decoded_chars(self, n=None):
1642 """Advance into the _decoded_chars buffer."""
1643 offset = self._decoded_chars_used
1644 if n is None:
1645 chars = self._decoded_chars[offset:]
1646 else:
1647 chars = self._decoded_chars[offset:offset + n]
1648 self._decoded_chars_used += len(chars)
1649 return chars
1650
1651 def _rewind_decoded_chars(self, n):
1652 """Rewind the _decoded_chars buffer."""
1653 if self._decoded_chars_used < n:
1654 raise AssertionError("rewind decoded_chars out of bounds")
1655 self._decoded_chars_used -= n
1656
1657 def _read_chunk(self):
1658 """
1659 Read and decode the next chunk of data from the BufferedReader.
1660 """
1661
1662 # The return value is True unless EOF was reached. The decoded
1663 # string is placed in self._decoded_chars (replacing its previous
1664 # value). The entire input chunk is sent to the decoder, though
1665 # some of it may remain buffered in the decoder, yet to be
1666 # converted.
1667
1668 if self._decoder is None:
1669 raise ValueError("no decoder")
1670
1671 if self._telling:
1672 # To prepare for tell(), we need to snapshot a point in the
1673 # file where the decoder's input buffer is empty.
1674
1675 dec_buffer, dec_flags = self._decoder.getstate()
1676 # Given this, we know there was a valid snapshot point
1677 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1678
1679 # Read a chunk, decode it, and put the result in self._decoded_chars.
1680 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1681 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001682 decoded_chars = self._decoder.decode(input_chunk, eof)
1683 self._set_decoded_chars(decoded_chars)
1684 if decoded_chars:
1685 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1686 else:
1687 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688
1689 if self._telling:
1690 # At the snapshot point, len(dec_buffer) bytes before the read,
1691 # the next input to be decoded is dec_buffer + input_chunk.
1692 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1693
1694 return not eof
1695
1696 def _pack_cookie(self, position, dec_flags=0,
1697 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1698 # The meaning of a tell() cookie is: seek to position, set the
1699 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1700 # into the decoder with need_eof as the EOF flag, then skip
1701 # chars_to_skip characters of the decoded result. For most simple
1702 # decoders, tell() will often just give a byte offset in the file.
1703 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1704 (chars_to_skip<<192) | bool(need_eof)<<256)
1705
1706 def _unpack_cookie(self, bigint):
1707 rest, position = divmod(bigint, 1<<64)
1708 rest, dec_flags = divmod(rest, 1<<64)
1709 rest, bytes_to_feed = divmod(rest, 1<<64)
1710 need_eof, chars_to_skip = divmod(rest, 1<<64)
1711 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1712
1713 def tell(self):
1714 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001715 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 if not self._telling:
1717 raise IOError("telling position disabled by next() call")
1718 self.flush()
1719 position = self.buffer.tell()
1720 decoder = self._decoder
1721 if decoder is None or self._snapshot is None:
1722 if self._decoded_chars:
1723 # This should never happen.
1724 raise AssertionError("pending decoded text")
1725 return position
1726
1727 # Skip backward to the snapshot point (see _read_chunk).
1728 dec_flags, next_input = self._snapshot
1729 position -= len(next_input)
1730
1731 # How many decoded characters have been used up since the snapshot?
1732 chars_to_skip = self._decoded_chars_used
1733 if chars_to_skip == 0:
1734 # We haven't moved from the snapshot point.
1735 return self._pack_cookie(position, dec_flags)
1736
1737 # Starting from the snapshot position, we will walk the decoder
1738 # forward until it gives us enough decoded characters.
1739 saved_state = decoder.getstate()
1740 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001741 # Fast search for an acceptable start point, close to our
1742 # current pos.
1743 # Rationale: calling decoder.decode() has a large overhead
1744 # regardless of chunk size; we want the number of such calls to
1745 # be O(1) in most situations (common decoders, non-crazy input).
1746 # Actually, it will be exactly 1 for fixed-size codecs (all
1747 # 8-bit codecs, also UTF-16 and UTF-32).
1748 skip_bytes = int(self._b2cratio * chars_to_skip)
1749 skip_back = 1
1750 assert skip_bytes <= len(next_input)
1751 while skip_bytes > 0:
1752 decoder.setstate((b'', dec_flags))
1753 # Decode up to temptative start point
1754 n = len(decoder.decode(next_input[:skip_bytes]))
1755 if n <= chars_to_skip:
1756 b, d = decoder.getstate()
1757 if not b:
1758 # Before pos and no bytes buffered in decoder => OK
1759 dec_flags = d
1760 chars_to_skip -= n
1761 break
1762 # Skip back by buffered amount and reset heuristic
1763 skip_bytes -= len(b)
1764 skip_back = 1
1765 else:
1766 # We're too far ahead, skip back a bit
1767 skip_bytes -= skip_back
1768 skip_back = skip_back * 2
1769 else:
1770 skip_bytes = 0
1771 decoder.setstate((b'', dec_flags))
1772
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001774 start_pos = position + skip_bytes
1775 start_flags = dec_flags
1776 if chars_to_skip == 0:
1777 # We haven't moved from the start point.
1778 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001779
1780 # Feed the decoder one byte at a time. As we go, note the
1781 # nearest "safe start point" before the current location
1782 # (a point where the decoder has nothing buffered, so seek()
1783 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001784 bytes_fed = 0
1785 need_eof = 0
1786 # Chars decoded since `start_pos`
1787 chars_decoded = 0
1788 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001790 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 dec_buffer, dec_flags = decoder.getstate()
1792 if not dec_buffer and chars_decoded <= chars_to_skip:
1793 # Decoder buffer is empty, so this is a safe start point.
1794 start_pos += bytes_fed
1795 chars_to_skip -= chars_decoded
1796 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1797 if chars_decoded >= chars_to_skip:
1798 break
1799 else:
1800 # We didn't get enough decoded data; signal EOF to get more.
1801 chars_decoded += len(decoder.decode(b'', final=True))
1802 need_eof = 1
1803 if chars_decoded < chars_to_skip:
1804 raise IOError("can't reconstruct logical file position")
1805
1806 # The returned cookie corresponds to the last safe start point.
1807 return self._pack_cookie(
1808 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1809 finally:
1810 decoder.setstate(saved_state)
1811
1812 def truncate(self, pos=None):
1813 self.flush()
1814 if pos is None:
1815 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001816 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001818 def detach(self):
1819 if self.buffer is None:
1820 raise ValueError("buffer is already detached")
1821 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001822 buffer = self._buffer
1823 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001824 return buffer
1825
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 def seek(self, cookie, whence=0):
1827 if self.closed:
1828 raise ValueError("tell on closed file")
1829 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001830 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 if whence == 1: # seek relative to current position
1832 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001833 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 # Seeking to the current position should attempt to
1835 # sync the underlying buffer with the current position.
1836 whence = 0
1837 cookie = self.tell()
1838 if whence == 2: # seek relative to end of file
1839 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001840 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 self.flush()
1842 position = self.buffer.seek(0, 2)
1843 self._set_decoded_chars('')
1844 self._snapshot = None
1845 if self._decoder:
1846 self._decoder.reset()
1847 return position
1848 if whence != 0:
1849 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1850 (whence,))
1851 if cookie < 0:
1852 raise ValueError("negative seek position %r" % (cookie,))
1853 self.flush()
1854
1855 # The strategy of seek() is to go back to the safe start point
1856 # and replay the effect of read(chars_to_skip) from there.
1857 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1858 self._unpack_cookie(cookie)
1859
1860 # Seek back to the safe start point.
1861 self.buffer.seek(start_pos)
1862 self._set_decoded_chars('')
1863 self._snapshot = None
1864
1865 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001866 if cookie == 0 and self._decoder:
1867 self._decoder.reset()
1868 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 self._decoder = self._decoder or self._get_decoder()
1870 self._decoder.setstate((b'', dec_flags))
1871 self._snapshot = (dec_flags, b'')
1872
1873 if chars_to_skip:
1874 # Just like _read_chunk, feed the decoder and save a snapshot.
1875 input_chunk = self.buffer.read(bytes_to_feed)
1876 self._set_decoded_chars(
1877 self._decoder.decode(input_chunk, need_eof))
1878 self._snapshot = (dec_flags, input_chunk)
1879
1880 # Skip chars_to_skip of the decoded characters.
1881 if len(self._decoded_chars) < chars_to_skip:
1882 raise IOError("can't restore logical file position")
1883 self._decoded_chars_used = chars_to_skip
1884
Antoine Pitroue4501852009-05-14 18:55:55 +00001885 # Finally, reset the encoder (merely useful for proper BOM handling)
1886 try:
1887 encoder = self._encoder or self._get_encoder()
1888 except LookupError:
1889 # Sometimes the encoder doesn't exist
1890 pass
1891 else:
1892 if cookie != 0:
1893 encoder.setstate(0)
1894 else:
1895 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001896 return cookie
1897
1898 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001899 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001900 if n is None:
1901 n = -1
1902 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001903 try:
1904 n.__index__
1905 except AttributeError as err:
1906 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 if n < 0:
1908 # Read everything.
1909 result = (self._get_decoded_chars() +
1910 decoder.decode(self.buffer.read(), final=True))
1911 self._set_decoded_chars('')
1912 self._snapshot = None
1913 return result
1914 else:
1915 # Keep reading chunks until we have n characters to return.
1916 eof = False
1917 result = self._get_decoded_chars(n)
1918 while len(result) < n and not eof:
1919 eof = not self._read_chunk()
1920 result += self._get_decoded_chars(n - len(result))
1921 return result
1922
1923 def __next__(self):
1924 self._telling = False
1925 line = self.readline()
1926 if not line:
1927 self._snapshot = None
1928 self._telling = self._seekable
1929 raise StopIteration
1930 return line
1931
1932 def readline(self, limit=None):
1933 if self.closed:
1934 raise ValueError("read from closed file")
1935 if limit is None:
1936 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001937 elif not isinstance(limit, int):
1938 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939
1940 # Grab all the decoded text (we will rewind any extra bits later).
1941 line = self._get_decoded_chars()
1942
1943 start = 0
1944 # Make the decoder if it doesn't already exist.
1945 if not self._decoder:
1946 self._get_decoder()
1947
1948 pos = endpos = None
1949 while True:
1950 if self._readtranslate:
1951 # Newlines are already translated, only search for \n
1952 pos = line.find('\n', start)
1953 if pos >= 0:
1954 endpos = pos + 1
1955 break
1956 else:
1957 start = len(line)
1958
1959 elif self._readuniversal:
1960 # Universal newline search. Find any of \r, \r\n, \n
1961 # The decoder ensures that \r\n are not split in two pieces
1962
1963 # In C we'd look for these in parallel of course.
1964 nlpos = line.find("\n", start)
1965 crpos = line.find("\r", start)
1966 if crpos == -1:
1967 if nlpos == -1:
1968 # Nothing found
1969 start = len(line)
1970 else:
1971 # Found \n
1972 endpos = nlpos + 1
1973 break
1974 elif nlpos == -1:
1975 # Found lone \r
1976 endpos = crpos + 1
1977 break
1978 elif nlpos < crpos:
1979 # Found \n
1980 endpos = nlpos + 1
1981 break
1982 elif nlpos == crpos + 1:
1983 # Found \r\n
1984 endpos = crpos + 2
1985 break
1986 else:
1987 # Found \r
1988 endpos = crpos + 1
1989 break
1990 else:
1991 # non-universal
1992 pos = line.find(self._readnl)
1993 if pos >= 0:
1994 endpos = pos + len(self._readnl)
1995 break
1996
1997 if limit >= 0 and len(line) >= limit:
1998 endpos = limit # reached length limit
1999 break
2000
2001 # No line ending seen yet - get more data'
2002 while self._read_chunk():
2003 if self._decoded_chars:
2004 break
2005 if self._decoded_chars:
2006 line += self._get_decoded_chars()
2007 else:
2008 # end of file
2009 self._set_decoded_chars('')
2010 self._snapshot = None
2011 return line
2012
2013 if limit >= 0 and endpos > limit:
2014 endpos = limit # don't exceed limit
2015
2016 # Rewind _decoded_chars to just after the line ending we found.
2017 self._rewind_decoded_chars(len(line) - endpos)
2018 return line[:endpos]
2019
2020 @property
2021 def newlines(self):
2022 return self._decoder.newlines if self._decoder else None
2023
2024
2025class StringIO(TextIOWrapper):
2026 """Text I/O implementation using an in-memory buffer.
2027
2028 The initial_value argument sets the value of object. The newline
2029 argument is like the one of TextIOWrapper's constructor.
2030 """
2031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 def __init__(self, initial_value="", newline="\n"):
2033 super(StringIO, self).__init__(BytesIO(),
2034 encoding="utf-8",
2035 errors="strict",
2036 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002037 # Issue #5645: make universal newlines semantics the same as in the
2038 # C version, even under Windows.
2039 if newline is None:
2040 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002041 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002043 raise TypeError("initial_value must be str or None, not {0}"
2044 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 initial_value = str(initial_value)
2046 self.write(initial_value)
2047 self.seek(0)
2048
2049 def getvalue(self):
2050 self.flush()
2051 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002052
2053 def __repr__(self):
2054 # TextIOWrapper tells the encoding in its repr. In StringIO,
2055 # that's a implementation detail.
2056 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002057
2058 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002059 def errors(self):
2060 return None
2061
2062 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002063 def encoding(self):
2064 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002065
2066 def detach(self):
2067 # This doesn't make sense on StringIO.
2068 self._unsupported("detach")