blob: c06f4b877b6fffc2c487270d425c0603e95d69ef [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01009import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000010# Import _thread instead of threading to reduce startup cost
11try:
12 from _thread import allocate_lock as Lock
13except ImportError:
14 from _dummy_thread import allocate_lock as Lock
15
16import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000017from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
Jesus Cea94363612012-06-22 18:32:07 +020019valid_seek_flags = {0, 1, 2} # Hardwired values
20if hasattr(os, 'SEEK_HOLE') :
21 valid_seek_flags.add(os.SEEK_HOLE)
22 valid_seek_flags.add(os.SEEK_DATA)
23
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000024# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020031# Rebind for compatibility
32BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000033
34
Georg Brandl4d73b572011-01-13 07:13:06 +000035def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020036 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000037
38 r"""Open file and return a stream. Raise IOError upon failure.
39
40 file is either a text or byte string giving the name (and the path
41 if the file isn't in the current working directory) of the file to
42 be opened or an integer file descriptor of the file to be
43 wrapped. (If a file descriptor is given, it is closed when the
44 returned I/O object is closed, unless closefd is set to False.)
45
Charles-François Natalidc3044c2012-01-09 22:40:02 +010046 mode is an optional string that specifies the mode in which the file is
47 opened. It defaults to 'r' which means open for reading in text mode. Other
48 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010049 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010050 (which on some Unix systems, means that all writes append to the end of the
51 file regardless of the current seek position). In text mode, if encoding is
52 not specified the encoding used is platform dependent. (For reading and
53 writing raw bytes use binary mode and leave encoding unspecified.) The
54 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000055
56 ========= ===============================================================
57 Character Meaning
58 --------- ---------------------------------------------------------------
59 'r' open for reading (default)
60 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010061 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 'a' open for writing, appending to the end of the file if it exists
63 'b' binary mode
64 't' text mode (default)
65 '+' open a disk file for updating (reading and writing)
66 'U' universal newline mode (for backwards compatibility; unneeded
67 for new code)
68 ========= ===============================================================
69
70 The default mode is 'rt' (open for reading text). For binary random
71 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010072 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
73 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074
75 Python distinguishes between files opened in binary and text modes,
76 even when the underlying operating system doesn't. Files opened in
77 binary mode (appending 'b' to the mode argument) return contents as
78 bytes objects without any decoding. In text mode (the default, or when
79 't' is appended to the mode argument), the contents of the file are
80 returned as strings, the bytes having been first decoded using a
81 platform-dependent encoding or using the specified encoding if given.
82
Antoine Pitroud5587bc2009-12-19 21:08:31 +000083 buffering is an optional integer used to set the buffering policy.
84 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
85 line buffering (only usable in text mode), and an integer > 1 to indicate
86 the size of a fixed-size chunk buffer. When no buffering argument is
87 given, the default buffering policy works as follows:
88
89 * Binary files are buffered in fixed-size chunks; the size of the buffer
90 is chosen using a heuristic trying to determine the underlying device's
91 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
92 On many systems, the buffer will typically be 4096 or 8192 bytes long.
93
94 * "Interactive" text files (files for which isatty() returns True)
95 use line buffering. Other text files use the policy described above
96 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000097
Raymond Hettingercbb80892011-01-13 18:15:51 +000098 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099 file. This should only be used in text mode. The default encoding is
100 platform dependent, but any encoding supported by Python can be
101 passed. See the codecs module for the list of supported encodings.
102
103 errors is an optional string that specifies how encoding errors are to
104 be handled---this argument should not be used in binary mode. Pass
105 'strict' to raise a ValueError exception if there is an encoding error
106 (the default of None has the same effect), or pass 'ignore' to ignore
107 errors. (Note that ignoring encoding errors can lead to data loss.)
108 See the documentation for codecs.register for a list of the permitted
109 encoding error strings.
110
Raymond Hettingercbb80892011-01-13 18:15:51 +0000111 newline is a string controlling how universal newlines works (it only
112 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
113 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114
115 * On input, if newline is None, universal newlines mode is
116 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
117 these are translated into '\n' before being returned to the
118 caller. If it is '', universal newline mode is enabled, but line
119 endings are returned to the caller untranslated. If it has any of
120 the other legal values, input lines are only terminated by the given
121 string, and the line ending is returned to the caller untranslated.
122
123 * On output, if newline is None, any '\n' characters written are
124 translated to the system default line separator, os.linesep. If
125 newline is '', no translation takes place. If newline is any of the
126 other legal values, any '\n' characters written are translated to
127 the given string.
128
Raymond Hettingercbb80892011-01-13 18:15:51 +0000129 closedfd is a bool. If closefd is False, the underlying file descriptor will
130 be kept open when the file is closed. This does not work when a file name is
131 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000132
Ross Lagerwall59142db2011-10-31 20:34:46 +0200133 A custom opener can be used by passing a callable as *opener*. The
134 underlying file descriptor for the file object is then obtained by calling
135 *opener* with (*file*, *flags*). *opener* must return an open file
136 descriptor (passing os.open as *opener* results in functionality similar to
137 passing None).
138
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000139 open() returns a file object whose type depends on the mode, and
140 through which the standard file operations such as reading and writing
141 are performed. When open() is used to open a file in a text mode ('w',
142 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
143 a file in a binary mode, the returned class varies: in read binary
144 mode, it returns a BufferedReader; in write binary and append binary
145 modes, it returns a BufferedWriter, and in read/write mode, it returns
146 a BufferedRandom.
147
148 It is also possible to use a string or bytearray as a file for both
149 reading and writing. For strings StringIO can be used like a file
150 opened in a text mode, and for bytes a BytesIO can be used like a file
151 opened in a binary mode.
152 """
153 if not isinstance(file, (str, bytes, int)):
154 raise TypeError("invalid file: %r" % file)
155 if not isinstance(mode, str):
156 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000157 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000158 raise TypeError("invalid buffering: %r" % buffering)
159 if encoding is not None and not isinstance(encoding, str):
160 raise TypeError("invalid encoding: %r" % encoding)
161 if errors is not None and not isinstance(errors, str):
162 raise TypeError("invalid errors: %r" % errors)
163 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100164 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000165 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100166 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167 reading = "r" in modes
168 writing = "w" in modes
169 appending = "a" in modes
170 updating = "+" in modes
171 text = "t" in modes
172 binary = "b" in modes
173 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100174 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 raise ValueError("can't use U and writing mode at once")
176 reading = True
177 if text and binary:
178 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100179 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100181 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 raise ValueError("must have exactly one of read/write/append mode")
183 if binary and encoding is not None:
184 raise ValueError("binary mode doesn't take an encoding argument")
185 if binary and errors is not None:
186 raise ValueError("binary mode doesn't take an errors argument")
187 if binary and newline is not None:
188 raise ValueError("binary mode doesn't take a newline argument")
189 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100190 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 (reading and "r" or "") +
192 (writing and "w" or "") +
193 (appending and "a" or "") +
194 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200195 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 line_buffering = False
197 if buffering == 1 or buffering < 0 and raw.isatty():
198 buffering = -1
199 line_buffering = True
200 if buffering < 0:
201 buffering = DEFAULT_BUFFER_SIZE
202 try:
203 bs = os.fstat(raw.fileno()).st_blksize
204 except (os.error, AttributeError):
205 pass
206 else:
207 if bs > 1:
208 buffering = bs
209 if buffering < 0:
210 raise ValueError("invalid buffering size")
211 if buffering == 0:
212 if binary:
213 return raw
214 raise ValueError("can't have unbuffered text I/O")
215 if updating:
216 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100217 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218 buffer = BufferedWriter(raw, buffering)
219 elif reading:
220 buffer = BufferedReader(raw, buffering)
221 else:
222 raise ValueError("unknown mode: %r" % mode)
223 if binary:
224 return buffer
225 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
226 text.mode = mode
227 return text
228
229
230class DocDescriptor:
231 """Helper for builtins.open.__doc__
232 """
233 def __get__(self, obj, typ):
234 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000235 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000236 "errors=None, newline=None, closefd=True)\n\n" +
237 open.__doc__)
238
239class OpenWrapper:
240 """Wrapper for builtins.open
241
242 Trick so that open won't become a bound method when stored
243 as a class variable (as dbm.dumb does).
244
245 See initstdio() in Python/pythonrun.c.
246 """
247 __doc__ = DocDescriptor()
248
249 def __new__(cls, *args, **kwargs):
250 return open(*args, **kwargs)
251
252
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000253# In normal operation, both `UnsupportedOperation`s should be bound to the
254# same object.
255try:
256 UnsupportedOperation = io.UnsupportedOperation
257except AttributeError:
258 class UnsupportedOperation(ValueError, IOError):
259 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260
261
262class IOBase(metaclass=abc.ABCMeta):
263
264 """The abstract base class for all I/O classes, acting on streams of
265 bytes. There is no public constructor.
266
267 This class provides dummy implementations for many methods that
268 derived classes can override selectively; the default implementations
269 represent a file that cannot be read, written or seeked.
270
271 Even though IOBase does not declare read, readinto, or write because
272 their signatures will vary, implementations and clients should
273 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000274 may raise UnsupportedOperation when operations they do not support are
275 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276
277 The basic type used for binary data read from or written to a file is
278 bytes. bytearrays are accepted too, and in some cases (such as
279 readinto) needed. Text I/O classes work with str data.
280
281 Note that calling any method (even inquiries) on a closed stream is
282 undefined. Implementations may raise IOError in this case.
283
284 IOBase (and its subclasses) support the iterator protocol, meaning
285 that an IOBase object can be iterated over yielding the lines in a
286 stream.
287
288 IOBase also supports the :keyword:`with` statement. In this example,
289 fp is closed after the suite of the with statement is complete:
290
291 with open('spam.txt', 'r') as fp:
292 fp.write('Spam and eggs!')
293 """
294
295 ### Internal ###
296
Raymond Hettinger3c940242011-01-12 23:39:31 +0000297 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000298 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 raise UnsupportedOperation("%s.%s() not supported" %
300 (self.__class__.__name__, name))
301
302 ### Positioning ###
303
Georg Brandl4d73b572011-01-13 07:13:06 +0000304 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 """Change stream position.
306
307 Change the stream position to byte offset offset. offset is
308 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000309 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
311 * 0 -- start of stream (the default); offset should be zero or positive
312 * 1 -- current stream position; offset may be negative
313 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200314 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000315
Raymond Hettingercbb80892011-01-13 18:15:51 +0000316 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 """
318 self._unsupported("seek")
319
Raymond Hettinger3c940242011-01-12 23:39:31 +0000320 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000321 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return self.seek(0, 1)
323
Georg Brandl4d73b572011-01-13 07:13:06 +0000324 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 """Truncate file to size bytes.
326
327 Size defaults to the current IO position as reported by tell(). Return
328 the new size.
329 """
330 self._unsupported("truncate")
331
332 ### Flush and close ###
333
Raymond Hettinger3c940242011-01-12 23:39:31 +0000334 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 """Flush write buffers, if applicable.
336
337 This is not implemented for read-only and non-blocking streams.
338 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000339 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 # XXX Should this return the number of bytes written???
341
342 __closed = False
343
Raymond Hettinger3c940242011-01-12 23:39:31 +0000344 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 """Flush and close the IO object.
346
347 This method has no effect if the file is already closed.
348 """
349 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000350 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351 self.__closed = True
352
Raymond Hettinger3c940242011-01-12 23:39:31 +0000353 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 """Destructor. Calls close()."""
355 # The try/except block is in case this is called at program
356 # exit time, when it's possible that globals have already been
357 # deleted, and then the close() call might fail. Since
358 # there's nothing we can do about such failures and they annoy
359 # the end users, we suppress the traceback.
360 try:
361 self.close()
362 except:
363 pass
364
365 ### Inquiries ###
366
Raymond Hettinger3c940242011-01-12 23:39:31 +0000367 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000368 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000370 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 This method may need to do a test seek().
372 """
373 return False
374
375 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000376 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377 """
378 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000379 raise UnsupportedOperation("File or stream is not seekable."
380 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381
Raymond Hettinger3c940242011-01-12 23:39:31 +0000382 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000383 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000384
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000385 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386 """
387 return False
388
389 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000390 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 """
392 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000393 raise UnsupportedOperation("File or stream is not readable."
394 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395
Raymond Hettinger3c940242011-01-12 23:39:31 +0000396 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000397 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000399 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 """
401 return False
402
403 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000404 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405 """
406 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000407 raise UnsupportedOperation("File or stream is not writable."
408 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409
410 @property
411 def closed(self):
412 """closed: bool. True iff the file has been closed.
413
414 For backwards compatibility, this is a property, not a predicate.
415 """
416 return self.__closed
417
418 def _checkClosed(self, msg=None):
419 """Internal: raise an ValueError if file is closed
420 """
421 if self.closed:
422 raise ValueError("I/O operation on closed file."
423 if msg is None else msg)
424
425 ### Context manager ###
426
Raymond Hettinger3c940242011-01-12 23:39:31 +0000427 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000428 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 self._checkClosed()
430 return self
431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 """Context management protocol. Calls close()"""
434 self.close()
435
436 ### Lower-level APIs ###
437
438 # XXX Should these be present even if unimplemented?
439
Raymond Hettinger3c940242011-01-12 23:39:31 +0000440 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000441 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442
443 An IOError is raised if the IO object does not use a file descriptor.
444 """
445 self._unsupported("fileno")
446
Raymond Hettinger3c940242011-01-12 23:39:31 +0000447 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000448 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000449
450 Return False if it can't be determined.
451 """
452 self._checkClosed()
453 return False
454
455 ### Readline[s] and writelines ###
456
Georg Brandl4d73b572011-01-13 07:13:06 +0000457 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000458 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459
460 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000461 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462
463 The line terminator is always b'\n' for binary files; for text
464 files, the newlines argument to open can be used to select the line
465 terminator(s) recognized.
466 """
467 # For backwards compatibility, a (slowish) readline().
468 if hasattr(self, "peek"):
469 def nreadahead():
470 readahead = self.peek(1)
471 if not readahead:
472 return 1
473 n = (readahead.find(b"\n") + 1) or len(readahead)
474 if limit >= 0:
475 n = min(n, limit)
476 return n
477 else:
478 def nreadahead():
479 return 1
480 if limit is None:
481 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000482 elif not isinstance(limit, int):
483 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484 res = bytearray()
485 while limit < 0 or len(res) < limit:
486 b = self.read(nreadahead())
487 if not b:
488 break
489 res += b
490 if res.endswith(b"\n"):
491 break
492 return bytes(res)
493
494 def __iter__(self):
495 self._checkClosed()
496 return self
497
498 def __next__(self):
499 line = self.readline()
500 if not line:
501 raise StopIteration
502 return line
503
504 def readlines(self, hint=None):
505 """Return a list of lines from the stream.
506
507 hint can be specified to control the number of lines read: no more
508 lines will be read if the total size (in bytes/characters) of all
509 lines so far exceeds hint.
510 """
511 if hint is None or hint <= 0:
512 return list(self)
513 n = 0
514 lines = []
515 for line in self:
516 lines.append(line)
517 n += len(line)
518 if n >= hint:
519 break
520 return lines
521
522 def writelines(self, lines):
523 self._checkClosed()
524 for line in lines:
525 self.write(line)
526
527io.IOBase.register(IOBase)
528
529
530class RawIOBase(IOBase):
531
532 """Base class for raw binary I/O."""
533
534 # The read() method is implemented by calling readinto(); derived
535 # classes that want to support read() only need to implement
536 # readinto() as a primitive operation. In general, readinto() can be
537 # more efficient than read().
538
539 # (It would be tempting to also provide an implementation of
540 # readinto() in terms of read(), in case the latter is a more suitable
541 # primitive operation, but that would lead to nasty recursion in case
542 # a subclass doesn't implement either.)
543
Georg Brandl4d73b572011-01-13 07:13:06 +0000544 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000545 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546
547 Returns an empty bytes object on EOF, or None if the object is
548 set not to block and has no data to read.
549 """
550 if n is None:
551 n = -1
552 if n < 0:
553 return self.readall()
554 b = bytearray(n.__index__())
555 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000556 if n is None:
557 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000558 del b[n:]
559 return bytes(b)
560
561 def readall(self):
562 """Read until EOF, using multiple read() call."""
563 res = bytearray()
564 while True:
565 data = self.read(DEFAULT_BUFFER_SIZE)
566 if not data:
567 break
568 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200569 if res:
570 return bytes(res)
571 else:
572 # b'' or None
573 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000574
Raymond Hettinger3c940242011-01-12 23:39:31 +0000575 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000576 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000577
Raymond Hettingercbb80892011-01-13 18:15:51 +0000578 Returns an int representing the number of bytes read (0 for EOF), or
579 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580 """
581 self._unsupported("readinto")
582
Raymond Hettinger3c940242011-01-12 23:39:31 +0000583 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 """Write the given buffer to the IO stream.
585
586 Returns the number of bytes written, which may be less than len(b).
587 """
588 self._unsupported("write")
589
590io.RawIOBase.register(RawIOBase)
591from _io import FileIO
592RawIOBase.register(FileIO)
593
594
595class BufferedIOBase(IOBase):
596
597 """Base class for buffered IO objects.
598
599 The main difference with RawIOBase is that the read() method
600 supports omitting the size argument, and does not have a default
601 implementation that defers to readinto().
602
603 In addition, read(), readinto() and write() may raise
604 BlockingIOError if the underlying raw stream is in non-blocking
605 mode and not ready; unlike their raw counterparts, they will never
606 return None.
607
608 A typical implementation should not inherit from a RawIOBase
609 implementation, but wrap one.
610 """
611
Georg Brandl4d73b572011-01-13 07:13:06 +0000612 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000613 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614
615 If the argument is omitted, None, or negative, reads and
616 returns all data until EOF.
617
618 If the argument is positive, and the underlying raw stream is
619 not 'interactive', multiple raw reads may be issued to satisfy
620 the byte count (unless EOF is reached first). But for
621 interactive raw streams (XXX and for pipes?), at most one raw
622 read will be issued, and a short result does not imply that
623 EOF is imminent.
624
625 Returns an empty bytes array on EOF.
626
627 Raises BlockingIOError if the underlying raw stream has no
628 data at the moment.
629 """
630 self._unsupported("read")
631
Georg Brandl4d73b572011-01-13 07:13:06 +0000632 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000633 """Read up to n bytes with at most one read() system call,
634 where n is an int.
635 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636 self._unsupported("read1")
637
Raymond Hettinger3c940242011-01-12 23:39:31 +0000638 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000639 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
641 Like read(), this may issue multiple reads to the underlying raw
642 stream, unless the latter is 'interactive'.
643
Raymond Hettingercbb80892011-01-13 18:15:51 +0000644 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645
646 Raises BlockingIOError if the underlying raw stream has no
647 data at the moment.
648 """
649 # XXX This ought to work with anything that supports the buffer API
650 data = self.read(len(b))
651 n = len(data)
652 try:
653 b[:n] = data
654 except TypeError as err:
655 import array
656 if not isinstance(b, array.array):
657 raise err
658 b[:n] = array.array('b', data)
659 return n
660
Raymond Hettinger3c940242011-01-12 23:39:31 +0000661 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000662 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663
664 Return the number of bytes written, which is never less than
665 len(b).
666
667 Raises BlockingIOError if the buffer is full and the
668 underlying raw stream cannot accept more data at the moment.
669 """
670 self._unsupported("write")
671
Raymond Hettinger3c940242011-01-12 23:39:31 +0000672 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000673 """
674 Separate the underlying raw stream from the buffer and return it.
675
676 After the raw stream has been detached, the buffer is in an unusable
677 state.
678 """
679 self._unsupported("detach")
680
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681io.BufferedIOBase.register(BufferedIOBase)
682
683
684class _BufferedIOMixin(BufferedIOBase):
685
686 """A mixin implementation of BufferedIOBase with an underlying raw stream.
687
688 This passes most requests on to the underlying raw stream. It
689 does *not* provide implementations of read(), readinto() or
690 write().
691 """
692
693 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000694 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000695
696 ### Positioning ###
697
698 def seek(self, pos, whence=0):
699 new_position = self.raw.seek(pos, whence)
700 if new_position < 0:
701 raise IOError("seek() returned an invalid position")
702 return new_position
703
704 def tell(self):
705 pos = self.raw.tell()
706 if pos < 0:
707 raise IOError("tell() returned an invalid position")
708 return pos
709
710 def truncate(self, pos=None):
711 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
712 # and a flush may be necessary to synch both views of the current
713 # file state.
714 self.flush()
715
716 if pos is None:
717 pos = self.tell()
718 # XXX: Should seek() be used, instead of passing the position
719 # XXX directly to truncate?
720 return self.raw.truncate(pos)
721
722 ### Flush and close ###
723
724 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000725 if self.closed:
726 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727 self.raw.flush()
728
729 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000730 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100731 try:
732 # may raise BlockingIOError or BrokenPipeError etc
733 self.flush()
734 finally:
735 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000737 def detach(self):
738 if self.raw is None:
739 raise ValueError("raw stream already detached")
740 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000741 raw = self._raw
742 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000743 return raw
744
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745 ### Inquiries ###
746
747 def seekable(self):
748 return self.raw.seekable()
749
750 def readable(self):
751 return self.raw.readable()
752
753 def writable(self):
754 return self.raw.writable()
755
756 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000757 def raw(self):
758 return self._raw
759
760 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000761 def closed(self):
762 return self.raw.closed
763
764 @property
765 def name(self):
766 return self.raw.name
767
768 @property
769 def mode(self):
770 return self.raw.mode
771
Antoine Pitrou243757e2010-11-05 21:15:39 +0000772 def __getstate__(self):
773 raise TypeError("can not serialize a '{0}' object"
774 .format(self.__class__.__name__))
775
Antoine Pitrou716c4442009-05-23 19:04:03 +0000776 def __repr__(self):
777 clsname = self.__class__.__name__
778 try:
779 name = self.name
780 except AttributeError:
781 return "<_pyio.{0}>".format(clsname)
782 else:
783 return "<_pyio.{0} name={1!r}>".format(clsname, name)
784
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 ### Lower-level APIs ###
786
787 def fileno(self):
788 return self.raw.fileno()
789
790 def isatty(self):
791 return self.raw.isatty()
792
793
794class BytesIO(BufferedIOBase):
795
796 """Buffered I/O implementation using an in-memory bytes buffer."""
797
798 def __init__(self, initial_bytes=None):
799 buf = bytearray()
800 if initial_bytes is not None:
801 buf += initial_bytes
802 self._buffer = buf
803 self._pos = 0
804
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000805 def __getstate__(self):
806 if self.closed:
807 raise ValueError("__getstate__ on closed file")
808 return self.__dict__.copy()
809
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810 def getvalue(self):
811 """Return the bytes value (contents) of the buffer
812 """
813 if self.closed:
814 raise ValueError("getvalue on closed file")
815 return bytes(self._buffer)
816
Antoine Pitrou972ee132010-09-06 18:48:21 +0000817 def getbuffer(self):
818 """Return a readable and writable view of the buffer.
819 """
820 return memoryview(self._buffer)
821
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 def read(self, n=None):
823 if self.closed:
824 raise ValueError("read from closed file")
825 if n is None:
826 n = -1
827 if n < 0:
828 n = len(self._buffer)
829 if len(self._buffer) <= self._pos:
830 return b""
831 newpos = min(len(self._buffer), self._pos + n)
832 b = self._buffer[self._pos : newpos]
833 self._pos = newpos
834 return bytes(b)
835
836 def read1(self, n):
837 """This is the same as read.
838 """
839 return self.read(n)
840
841 def write(self, b):
842 if self.closed:
843 raise ValueError("write to closed file")
844 if isinstance(b, str):
845 raise TypeError("can't write str to binary stream")
846 n = len(b)
847 if n == 0:
848 return 0
849 pos = self._pos
850 if pos > len(self._buffer):
851 # Inserts null bytes between the current end of the file
852 # and the new write position.
853 padding = b'\x00' * (pos - len(self._buffer))
854 self._buffer += padding
855 self._buffer[pos:pos + n] = b
856 self._pos += n
857 return n
858
859 def seek(self, pos, whence=0):
860 if self.closed:
861 raise ValueError("seek on closed file")
862 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000863 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000864 except AttributeError as err:
865 raise TypeError("an integer is required") from err
866 if whence == 0:
867 if pos < 0:
868 raise ValueError("negative seek position %r" % (pos,))
869 self._pos = pos
870 elif whence == 1:
871 self._pos = max(0, self._pos + pos)
872 elif whence == 2:
873 self._pos = max(0, len(self._buffer) + pos)
874 else:
Jesus Cea94363612012-06-22 18:32:07 +0200875 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000876 return self._pos
877
878 def tell(self):
879 if self.closed:
880 raise ValueError("tell on closed file")
881 return self._pos
882
883 def truncate(self, pos=None):
884 if self.closed:
885 raise ValueError("truncate on closed file")
886 if pos is None:
887 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000888 else:
889 try:
890 pos.__index__
891 except AttributeError as err:
892 raise TypeError("an integer is required") from err
893 if pos < 0:
894 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000896 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000897
898 def readable(self):
899 return True
900
901 def writable(self):
902 return True
903
904 def seekable(self):
905 return True
906
907
908class BufferedReader(_BufferedIOMixin):
909
910 """BufferedReader(raw[, buffer_size])
911
912 A buffer for a readable, sequential BaseRawIO object.
913
914 The constructor creates a BufferedReader for the given readable raw
915 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
916 is used.
917 """
918
919 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
920 """Create a new buffered reader using the given readable raw IO object.
921 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000922 if not raw.readable():
923 raise IOError('"raw" argument must be readable.')
924
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000925 _BufferedIOMixin.__init__(self, raw)
926 if buffer_size <= 0:
927 raise ValueError("invalid buffer size")
928 self.buffer_size = buffer_size
929 self._reset_read_buf()
930 self._read_lock = Lock()
931
932 def _reset_read_buf(self):
933 self._read_buf = b""
934 self._read_pos = 0
935
936 def read(self, n=None):
937 """Read n bytes.
938
939 Returns exactly n bytes of data unless the underlying raw IO
940 stream reaches EOF or if the call would block in non-blocking
941 mode. If n is negative, read until EOF or until read() would
942 block.
943 """
944 if n is not None and n < -1:
945 raise ValueError("invalid number of bytes to read")
946 with self._read_lock:
947 return self._read_unlocked(n)
948
949 def _read_unlocked(self, n=None):
950 nodata_val = b""
951 empty_values = (b"", None)
952 buf = self._read_buf
953 pos = self._read_pos
954
955 # Special case for when the number of bytes to read is unspecified.
956 if n is None or n == -1:
957 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200958 if hasattr(self.raw, 'readall'):
959 chunk = self.raw.readall()
960 if chunk is None:
961 return buf[pos:] or None
962 else:
963 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 chunks = [buf[pos:]] # Strip the consumed bytes.
965 current_size = 0
966 while True:
967 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000968 try:
969 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200970 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000971 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000972 if chunk in empty_values:
973 nodata_val = chunk
974 break
975 current_size += len(chunk)
976 chunks.append(chunk)
977 return b"".join(chunks) or nodata_val
978
979 # The number of bytes to read is specified, return at most n bytes.
980 avail = len(buf) - pos # Length of the available buffered data.
981 if n <= avail:
982 # Fast path: the data to read is fully buffered.
983 self._read_pos += n
984 return buf[pos:pos+n]
985 # Slow path: read from the stream until enough bytes are read,
986 # or until an EOF occurs or until read() would block.
987 chunks = [buf[pos:]]
988 wanted = max(self.buffer_size, n)
989 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000990 try:
991 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200992 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000993 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 if chunk in empty_values:
995 nodata_val = chunk
996 break
997 avail += len(chunk)
998 chunks.append(chunk)
999 # n is more then avail only when an EOF occurred or when
1000 # read() would have blocked.
1001 n = min(n, avail)
1002 out = b"".join(chunks)
1003 self._read_buf = out[n:] # Save the extra data in the buffer.
1004 self._read_pos = 0
1005 return out[:n] if out else nodata_val
1006
1007 def peek(self, n=0):
1008 """Returns buffered bytes without advancing the position.
1009
1010 The argument indicates a desired minimal number of bytes; we
1011 do at most one raw read to satisfy it. We never return more
1012 than self.buffer_size.
1013 """
1014 with self._read_lock:
1015 return self._peek_unlocked(n)
1016
1017 def _peek_unlocked(self, n=0):
1018 want = min(n, self.buffer_size)
1019 have = len(self._read_buf) - self._read_pos
1020 if have < want or have <= 0:
1021 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001022 while True:
1023 try:
1024 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001025 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001026 continue
1027 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001028 if current:
1029 self._read_buf = self._read_buf[self._read_pos:] + current
1030 self._read_pos = 0
1031 return self._read_buf[self._read_pos:]
1032
1033 def read1(self, n):
1034 """Reads up to n bytes, with at most one read() system call."""
1035 # Returns up to n bytes. If at least one byte is buffered, we
1036 # only return buffered bytes. Otherwise, we do one raw read.
1037 if n < 0:
1038 raise ValueError("number of bytes to read must be positive")
1039 if n == 0:
1040 return b""
1041 with self._read_lock:
1042 self._peek_unlocked(1)
1043 return self._read_unlocked(
1044 min(n, len(self._read_buf) - self._read_pos))
1045
1046 def tell(self):
1047 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1048
1049 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001050 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001051 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001052 with self._read_lock:
1053 if whence == 1:
1054 pos -= len(self._read_buf) - self._read_pos
1055 pos = _BufferedIOMixin.seek(self, pos, whence)
1056 self._reset_read_buf()
1057 return pos
1058
1059class BufferedWriter(_BufferedIOMixin):
1060
1061 """A buffer for a writeable sequential RawIO object.
1062
1063 The constructor creates a BufferedWriter for the given writeable raw
1064 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001065 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 """
1067
Benjamin Peterson59406a92009-03-26 17:10:29 +00001068 _warning_stack_offset = 2
1069
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 def __init__(self, raw,
1071 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001072 if not raw.writable():
1073 raise IOError('"raw" argument must be writable.')
1074
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075 _BufferedIOMixin.__init__(self, raw)
1076 if buffer_size <= 0:
1077 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001078 if max_buffer_size is not None:
1079 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1080 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 self._write_buf = bytearray()
1083 self._write_lock = Lock()
1084
1085 def write(self, b):
1086 if self.closed:
1087 raise ValueError("write to closed file")
1088 if isinstance(b, str):
1089 raise TypeError("can't write str to binary stream")
1090 with self._write_lock:
1091 # XXX we can implement some more tricks to try and avoid
1092 # partial writes
1093 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001094 # We're full, so let's pre-flush the buffer. (This may
1095 # raise BlockingIOError with characters_written == 0.)
1096 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 before = len(self._write_buf)
1098 self._write_buf.extend(b)
1099 written = len(self._write_buf) - before
1100 if len(self._write_buf) > self.buffer_size:
1101 try:
1102 self._flush_unlocked()
1103 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001104 if len(self._write_buf) > self.buffer_size:
1105 # We've hit the buffer_size. We have to accept a partial
1106 # write and cut back our buffer.
1107 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001109 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001110 raise BlockingIOError(e.errno, e.strerror, written)
1111 return written
1112
1113 def truncate(self, pos=None):
1114 with self._write_lock:
1115 self._flush_unlocked()
1116 if pos is None:
1117 pos = self.raw.tell()
1118 return self.raw.truncate(pos)
1119
1120 def flush(self):
1121 with self._write_lock:
1122 self._flush_unlocked()
1123
1124 def _flush_unlocked(self):
1125 if self.closed:
1126 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001127 while self._write_buf:
1128 try:
1129 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001130 except InterruptedError:
1131 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001132 except BlockingIOError:
1133 raise RuntimeError("self.raw should implement RawIOBase: it "
1134 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001135 if n is None:
1136 raise BlockingIOError(
1137 errno.EAGAIN,
1138 "write could not complete without blocking", 0)
1139 if n > len(self._write_buf) or n < 0:
1140 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142
1143 def tell(self):
1144 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1145
1146 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001147 if whence not in valid_seek_flags:
1148 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149 with self._write_lock:
1150 self._flush_unlocked()
1151 return _BufferedIOMixin.seek(self, pos, whence)
1152
1153
1154class BufferedRWPair(BufferedIOBase):
1155
1156 """A buffered reader and writer object together.
1157
1158 A buffered reader object and buffered writer object put together to
1159 form a sequential IO object that can read and write. This is typically
1160 used with a socket or two-way pipe.
1161
1162 reader and writer are RawIOBase objects that are readable and
1163 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001164 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 """
1166
1167 # XXX The usefulness of this (compared to having two separate IO
1168 # objects) is questionable.
1169
1170 def __init__(self, reader, writer,
1171 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1172 """Constructor.
1173
1174 The arguments are two RawIO instances.
1175 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001176 if max_buffer_size is not None:
1177 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001178
1179 if not reader.readable():
1180 raise IOError('"reader" argument must be readable.')
1181
1182 if not writer.writable():
1183 raise IOError('"writer" argument must be writable.')
1184
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001186 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187
1188 def read(self, n=None):
1189 if n is None:
1190 n = -1
1191 return self.reader.read(n)
1192
1193 def readinto(self, b):
1194 return self.reader.readinto(b)
1195
1196 def write(self, b):
1197 return self.writer.write(b)
1198
1199 def peek(self, n=0):
1200 return self.reader.peek(n)
1201
1202 def read1(self, n):
1203 return self.reader.read1(n)
1204
1205 def readable(self):
1206 return self.reader.readable()
1207
1208 def writable(self):
1209 return self.writer.writable()
1210
1211 def flush(self):
1212 return self.writer.flush()
1213
1214 def close(self):
1215 self.writer.close()
1216 self.reader.close()
1217
1218 def isatty(self):
1219 return self.reader.isatty() or self.writer.isatty()
1220
1221 @property
1222 def closed(self):
1223 return self.writer.closed
1224
1225
1226class BufferedRandom(BufferedWriter, BufferedReader):
1227
1228 """A buffered interface to random access streams.
1229
1230 The constructor creates a reader and writer for a seekable stream,
1231 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001232 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 """
1234
Benjamin Peterson59406a92009-03-26 17:10:29 +00001235 _warning_stack_offset = 3
1236
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237 def __init__(self, raw,
1238 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1239 raw._checkSeekable()
1240 BufferedReader.__init__(self, raw, buffer_size)
1241 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1242
1243 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001244 if whence not in valid_seek_flags:
1245 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 self.flush()
1247 if self._read_buf:
1248 # Undo read ahead.
1249 with self._read_lock:
1250 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1251 # First do the raw seek, then empty the read buffer, so that
1252 # if the raw seek fails, we don't lose buffered data forever.
1253 pos = self.raw.seek(pos, whence)
1254 with self._read_lock:
1255 self._reset_read_buf()
1256 if pos < 0:
1257 raise IOError("seek() returned invalid position")
1258 return pos
1259
1260 def tell(self):
1261 if self._write_buf:
1262 return BufferedWriter.tell(self)
1263 else:
1264 return BufferedReader.tell(self)
1265
1266 def truncate(self, pos=None):
1267 if pos is None:
1268 pos = self.tell()
1269 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001270 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271
1272 def read(self, n=None):
1273 if n is None:
1274 n = -1
1275 self.flush()
1276 return BufferedReader.read(self, n)
1277
1278 def readinto(self, b):
1279 self.flush()
1280 return BufferedReader.readinto(self, b)
1281
1282 def peek(self, n=0):
1283 self.flush()
1284 return BufferedReader.peek(self, n)
1285
1286 def read1(self, n):
1287 self.flush()
1288 return BufferedReader.read1(self, n)
1289
1290 def write(self, b):
1291 if self._read_buf:
1292 # Undo readahead
1293 with self._read_lock:
1294 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1295 self._reset_read_buf()
1296 return BufferedWriter.write(self, b)
1297
1298
1299class TextIOBase(IOBase):
1300
1301 """Base class for text I/O.
1302
1303 This class provides a character and line based interface to stream
1304 I/O. There is no readinto method because Python's character strings
1305 are immutable. There is no public constructor.
1306 """
1307
Georg Brandl4d73b572011-01-13 07:13:06 +00001308 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001309 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001310
1311 Read from underlying buffer until we have n characters or we hit EOF.
1312 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001313
1314 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001315 """
1316 self._unsupported("read")
1317
Raymond Hettinger3c940242011-01-12 23:39:31 +00001318 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001319 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320 self._unsupported("write")
1321
Georg Brandl4d73b572011-01-13 07:13:06 +00001322 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001323 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001324 self._unsupported("truncate")
1325
Raymond Hettinger3c940242011-01-12 23:39:31 +00001326 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327 """Read until newline or EOF.
1328
1329 Returns an empty string if EOF is hit immediately.
1330 """
1331 self._unsupported("readline")
1332
Raymond Hettinger3c940242011-01-12 23:39:31 +00001333 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001334 """
1335 Separate the underlying buffer from the TextIOBase and return it.
1336
1337 After the underlying buffer has been detached, the TextIO is in an
1338 unusable state.
1339 """
1340 self._unsupported("detach")
1341
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 @property
1343 def encoding(self):
1344 """Subclasses should override."""
1345 return None
1346
1347 @property
1348 def newlines(self):
1349 """Line endings translated so far.
1350
1351 Only line endings translated during reading are considered.
1352
1353 Subclasses should override.
1354 """
1355 return None
1356
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001357 @property
1358 def errors(self):
1359 """Error setting of the decoder or encoder.
1360
1361 Subclasses should override."""
1362 return None
1363
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364io.TextIOBase.register(TextIOBase)
1365
1366
1367class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1368 r"""Codec used when reading a file in universal newlines mode. It wraps
1369 another incremental decoder, translating \r\n and \r into \n. It also
1370 records the types of newlines encountered. When used with
1371 translate=False, it ensures that the newline sequence is returned in
1372 one piece.
1373 """
1374 def __init__(self, decoder, translate, errors='strict'):
1375 codecs.IncrementalDecoder.__init__(self, errors=errors)
1376 self.translate = translate
1377 self.decoder = decoder
1378 self.seennl = 0
1379 self.pendingcr = False
1380
1381 def decode(self, input, final=False):
1382 # decode input (with the eventual \r from a previous pass)
1383 if self.decoder is None:
1384 output = input
1385 else:
1386 output = self.decoder.decode(input, final=final)
1387 if self.pendingcr and (output or final):
1388 output = "\r" + output
1389 self.pendingcr = False
1390
1391 # retain last \r even when not translating data:
1392 # then readline() is sure to get \r\n in one pass
1393 if output.endswith("\r") and not final:
1394 output = output[:-1]
1395 self.pendingcr = True
1396
1397 # Record which newlines are read
1398 crlf = output.count('\r\n')
1399 cr = output.count('\r') - crlf
1400 lf = output.count('\n') - crlf
1401 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1402 | (crlf and self._CRLF)
1403
1404 if self.translate:
1405 if crlf:
1406 output = output.replace("\r\n", "\n")
1407 if cr:
1408 output = output.replace("\r", "\n")
1409
1410 return output
1411
1412 def getstate(self):
1413 if self.decoder is None:
1414 buf = b""
1415 flag = 0
1416 else:
1417 buf, flag = self.decoder.getstate()
1418 flag <<= 1
1419 if self.pendingcr:
1420 flag |= 1
1421 return buf, flag
1422
1423 def setstate(self, state):
1424 buf, flag = state
1425 self.pendingcr = bool(flag & 1)
1426 if self.decoder is not None:
1427 self.decoder.setstate((buf, flag >> 1))
1428
1429 def reset(self):
1430 self.seennl = 0
1431 self.pendingcr = False
1432 if self.decoder is not None:
1433 self.decoder.reset()
1434
1435 _LF = 1
1436 _CR = 2
1437 _CRLF = 4
1438
1439 @property
1440 def newlines(self):
1441 return (None,
1442 "\n",
1443 "\r",
1444 ("\r", "\n"),
1445 "\r\n",
1446 ("\n", "\r\n"),
1447 ("\r", "\r\n"),
1448 ("\r", "\n", "\r\n")
1449 )[self.seennl]
1450
1451
1452class TextIOWrapper(TextIOBase):
1453
1454 r"""Character and line based layer over a BufferedIOBase object, buffer.
1455
1456 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001457 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001458
1459 errors determines the strictness of encoding and decoding (see the
1460 codecs.register) and defaults to "strict".
1461
1462 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1463 handling of line endings. If it is None, universal newlines is
1464 enabled. With this enabled, on input, the lines endings '\n', '\r',
1465 or '\r\n' are translated to '\n' before being returned to the
1466 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001467 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 legal values, that newline becomes the newline when the file is read
1469 and it is returned untranslated. On output, '\n' is converted to the
1470 newline.
1471
1472 If line_buffering is True, a call to flush is implied when a call to
1473 write contains a newline character.
1474 """
1475
1476 _CHUNK_SIZE = 2048
1477
1478 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001479 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480 if newline is not None and not isinstance(newline, str):
1481 raise TypeError("illegal newline type: %r" % (type(newline),))
1482 if newline not in (None, "", "\n", "\r", "\r\n"):
1483 raise ValueError("illegal newline value: %r" % (newline,))
1484 if encoding is None:
1485 try:
1486 encoding = os.device_encoding(buffer.fileno())
1487 except (AttributeError, UnsupportedOperation):
1488 pass
1489 if encoding is None:
1490 try:
1491 import locale
1492 except ImportError:
1493 # Importing locale may fail if Python is being built
1494 encoding = "ascii"
1495 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001496 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497
1498 if not isinstance(encoding, str):
1499 raise ValueError("invalid encoding: %r" % encoding)
1500
1501 if errors is None:
1502 errors = "strict"
1503 else:
1504 if not isinstance(errors, str):
1505 raise ValueError("invalid errors: %r" % errors)
1506
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001507 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 self._line_buffering = line_buffering
1509 self._encoding = encoding
1510 self._errors = errors
1511 self._readuniversal = not newline
1512 self._readtranslate = newline is None
1513 self._readnl = newline
1514 self._writetranslate = newline != ''
1515 self._writenl = newline or os.linesep
1516 self._encoder = None
1517 self._decoder = None
1518 self._decoded_chars = '' # buffer for text returned from decoder
1519 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1520 self._snapshot = None # info for reconstructing decoder state
1521 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001522 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001523 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524
Antoine Pitroue4501852009-05-14 18:55:55 +00001525 if self._seekable and self.writable():
1526 position = self.buffer.tell()
1527 if position != 0:
1528 try:
1529 self._get_encoder().setstate(0)
1530 except LookupError:
1531 # Sometimes the encoder doesn't exist
1532 pass
1533
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1535 # where dec_flags is the second (integer) item of the decoder state
1536 # and next_input is the chunk of input bytes that comes next after the
1537 # snapshot point. We use this to reconstruct decoder states in tell().
1538
1539 # Naming convention:
1540 # - "bytes_..." for integer variables that count input bytes
1541 # - "chars_..." for integer variables that count decoded characters
1542
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001543 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001544 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001545 try:
1546 name = self.name
1547 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001548 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001549 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001550 result += " name={0!r}".format(name)
1551 try:
1552 mode = self.mode
1553 except AttributeError:
1554 pass
1555 else:
1556 result += " mode={0!r}".format(mode)
1557 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001558
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 @property
1560 def encoding(self):
1561 return self._encoding
1562
1563 @property
1564 def errors(self):
1565 return self._errors
1566
1567 @property
1568 def line_buffering(self):
1569 return self._line_buffering
1570
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001571 @property
1572 def buffer(self):
1573 return self._buffer
1574
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001575 def seekable(self):
1576 return self._seekable
1577
1578 def readable(self):
1579 return self.buffer.readable()
1580
1581 def writable(self):
1582 return self.buffer.writable()
1583
1584 def flush(self):
1585 self.buffer.flush()
1586 self._telling = self._seekable
1587
1588 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001589 if self.buffer is not None and not self.closed:
1590 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001591 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592
1593 @property
1594 def closed(self):
1595 return self.buffer.closed
1596
1597 @property
1598 def name(self):
1599 return self.buffer.name
1600
1601 def fileno(self):
1602 return self.buffer.fileno()
1603
1604 def isatty(self):
1605 return self.buffer.isatty()
1606
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001607 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001608 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 if self.closed:
1610 raise ValueError("write to closed file")
1611 if not isinstance(s, str):
1612 raise TypeError("can't write %s to text stream" %
1613 s.__class__.__name__)
1614 length = len(s)
1615 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1616 if haslf and self._writetranslate and self._writenl != "\n":
1617 s = s.replace("\n", self._writenl)
1618 encoder = self._encoder or self._get_encoder()
1619 # XXX What if we were just reading?
1620 b = encoder.encode(s)
1621 self.buffer.write(b)
1622 if self._line_buffering and (haslf or "\r" in s):
1623 self.flush()
1624 self._snapshot = None
1625 if self._decoder:
1626 self._decoder.reset()
1627 return length
1628
1629 def _get_encoder(self):
1630 make_encoder = codecs.getincrementalencoder(self._encoding)
1631 self._encoder = make_encoder(self._errors)
1632 return self._encoder
1633
1634 def _get_decoder(self):
1635 make_decoder = codecs.getincrementaldecoder(self._encoding)
1636 decoder = make_decoder(self._errors)
1637 if self._readuniversal:
1638 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1639 self._decoder = decoder
1640 return decoder
1641
1642 # The following three methods implement an ADT for _decoded_chars.
1643 # Text returned from the decoder is buffered here until the client
1644 # requests it by calling our read() or readline() method.
1645 def _set_decoded_chars(self, chars):
1646 """Set the _decoded_chars buffer."""
1647 self._decoded_chars = chars
1648 self._decoded_chars_used = 0
1649
1650 def _get_decoded_chars(self, n=None):
1651 """Advance into the _decoded_chars buffer."""
1652 offset = self._decoded_chars_used
1653 if n is None:
1654 chars = self._decoded_chars[offset:]
1655 else:
1656 chars = self._decoded_chars[offset:offset + n]
1657 self._decoded_chars_used += len(chars)
1658 return chars
1659
1660 def _rewind_decoded_chars(self, n):
1661 """Rewind the _decoded_chars buffer."""
1662 if self._decoded_chars_used < n:
1663 raise AssertionError("rewind decoded_chars out of bounds")
1664 self._decoded_chars_used -= n
1665
1666 def _read_chunk(self):
1667 """
1668 Read and decode the next chunk of data from the BufferedReader.
1669 """
1670
1671 # The return value is True unless EOF was reached. The decoded
1672 # string is placed in self._decoded_chars (replacing its previous
1673 # value). The entire input chunk is sent to the decoder, though
1674 # some of it may remain buffered in the decoder, yet to be
1675 # converted.
1676
1677 if self._decoder is None:
1678 raise ValueError("no decoder")
1679
1680 if self._telling:
1681 # To prepare for tell(), we need to snapshot a point in the
1682 # file where the decoder's input buffer is empty.
1683
1684 dec_buffer, dec_flags = self._decoder.getstate()
1685 # Given this, we know there was a valid snapshot point
1686 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1687
1688 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001689 if self._has_read1:
1690 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1691 else:
1692 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001694 decoded_chars = self._decoder.decode(input_chunk, eof)
1695 self._set_decoded_chars(decoded_chars)
1696 if decoded_chars:
1697 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1698 else:
1699 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700
1701 if self._telling:
1702 # At the snapshot point, len(dec_buffer) bytes before the read,
1703 # the next input to be decoded is dec_buffer + input_chunk.
1704 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1705
1706 return not eof
1707
1708 def _pack_cookie(self, position, dec_flags=0,
1709 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1710 # The meaning of a tell() cookie is: seek to position, set the
1711 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1712 # into the decoder with need_eof as the EOF flag, then skip
1713 # chars_to_skip characters of the decoded result. For most simple
1714 # decoders, tell() will often just give a byte offset in the file.
1715 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1716 (chars_to_skip<<192) | bool(need_eof)<<256)
1717
1718 def _unpack_cookie(self, bigint):
1719 rest, position = divmod(bigint, 1<<64)
1720 rest, dec_flags = divmod(rest, 1<<64)
1721 rest, bytes_to_feed = divmod(rest, 1<<64)
1722 need_eof, chars_to_skip = divmod(rest, 1<<64)
1723 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1724
1725 def tell(self):
1726 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001727 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 if not self._telling:
1729 raise IOError("telling position disabled by next() call")
1730 self.flush()
1731 position = self.buffer.tell()
1732 decoder = self._decoder
1733 if decoder is None or self._snapshot is None:
1734 if self._decoded_chars:
1735 # This should never happen.
1736 raise AssertionError("pending decoded text")
1737 return position
1738
1739 # Skip backward to the snapshot point (see _read_chunk).
1740 dec_flags, next_input = self._snapshot
1741 position -= len(next_input)
1742
1743 # How many decoded characters have been used up since the snapshot?
1744 chars_to_skip = self._decoded_chars_used
1745 if chars_to_skip == 0:
1746 # We haven't moved from the snapshot point.
1747 return self._pack_cookie(position, dec_flags)
1748
1749 # Starting from the snapshot position, we will walk the decoder
1750 # forward until it gives us enough decoded characters.
1751 saved_state = decoder.getstate()
1752 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001753 # Fast search for an acceptable start point, close to our
1754 # current pos.
1755 # Rationale: calling decoder.decode() has a large overhead
1756 # regardless of chunk size; we want the number of such calls to
1757 # be O(1) in most situations (common decoders, non-crazy input).
1758 # Actually, it will be exactly 1 for fixed-size codecs (all
1759 # 8-bit codecs, also UTF-16 and UTF-32).
1760 skip_bytes = int(self._b2cratio * chars_to_skip)
1761 skip_back = 1
1762 assert skip_bytes <= len(next_input)
1763 while skip_bytes > 0:
1764 decoder.setstate((b'', dec_flags))
1765 # Decode up to temptative start point
1766 n = len(decoder.decode(next_input[:skip_bytes]))
1767 if n <= chars_to_skip:
1768 b, d = decoder.getstate()
1769 if not b:
1770 # Before pos and no bytes buffered in decoder => OK
1771 dec_flags = d
1772 chars_to_skip -= n
1773 break
1774 # Skip back by buffered amount and reset heuristic
1775 skip_bytes -= len(b)
1776 skip_back = 1
1777 else:
1778 # We're too far ahead, skip back a bit
1779 skip_bytes -= skip_back
1780 skip_back = skip_back * 2
1781 else:
1782 skip_bytes = 0
1783 decoder.setstate((b'', dec_flags))
1784
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001786 start_pos = position + skip_bytes
1787 start_flags = dec_flags
1788 if chars_to_skip == 0:
1789 # We haven't moved from the start point.
1790 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791
1792 # Feed the decoder one byte at a time. As we go, note the
1793 # nearest "safe start point" before the current location
1794 # (a point where the decoder has nothing buffered, so seek()
1795 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001796 bytes_fed = 0
1797 need_eof = 0
1798 # Chars decoded since `start_pos`
1799 chars_decoded = 0
1800 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001802 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 dec_buffer, dec_flags = decoder.getstate()
1804 if not dec_buffer and chars_decoded <= chars_to_skip:
1805 # Decoder buffer is empty, so this is a safe start point.
1806 start_pos += bytes_fed
1807 chars_to_skip -= chars_decoded
1808 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1809 if chars_decoded >= chars_to_skip:
1810 break
1811 else:
1812 # We didn't get enough decoded data; signal EOF to get more.
1813 chars_decoded += len(decoder.decode(b'', final=True))
1814 need_eof = 1
1815 if chars_decoded < chars_to_skip:
1816 raise IOError("can't reconstruct logical file position")
1817
1818 # The returned cookie corresponds to the last safe start point.
1819 return self._pack_cookie(
1820 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1821 finally:
1822 decoder.setstate(saved_state)
1823
1824 def truncate(self, pos=None):
1825 self.flush()
1826 if pos is None:
1827 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001828 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001830 def detach(self):
1831 if self.buffer is None:
1832 raise ValueError("buffer is already detached")
1833 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001834 buffer = self._buffer
1835 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001836 return buffer
1837
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838 def seek(self, cookie, whence=0):
1839 if self.closed:
1840 raise ValueError("tell on closed file")
1841 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001842 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 if whence == 1: # seek relative to current position
1844 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001845 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 # Seeking to the current position should attempt to
1847 # sync the underlying buffer with the current position.
1848 whence = 0
1849 cookie = self.tell()
1850 if whence == 2: # seek relative to end of file
1851 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001852 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853 self.flush()
1854 position = self.buffer.seek(0, 2)
1855 self._set_decoded_chars('')
1856 self._snapshot = None
1857 if self._decoder:
1858 self._decoder.reset()
1859 return position
1860 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001861 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001862 if cookie < 0:
1863 raise ValueError("negative seek position %r" % (cookie,))
1864 self.flush()
1865
1866 # The strategy of seek() is to go back to the safe start point
1867 # and replay the effect of read(chars_to_skip) from there.
1868 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1869 self._unpack_cookie(cookie)
1870
1871 # Seek back to the safe start point.
1872 self.buffer.seek(start_pos)
1873 self._set_decoded_chars('')
1874 self._snapshot = None
1875
1876 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001877 if cookie == 0 and self._decoder:
1878 self._decoder.reset()
1879 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001880 self._decoder = self._decoder or self._get_decoder()
1881 self._decoder.setstate((b'', dec_flags))
1882 self._snapshot = (dec_flags, b'')
1883
1884 if chars_to_skip:
1885 # Just like _read_chunk, feed the decoder and save a snapshot.
1886 input_chunk = self.buffer.read(bytes_to_feed)
1887 self._set_decoded_chars(
1888 self._decoder.decode(input_chunk, need_eof))
1889 self._snapshot = (dec_flags, input_chunk)
1890
1891 # Skip chars_to_skip of the decoded characters.
1892 if len(self._decoded_chars) < chars_to_skip:
1893 raise IOError("can't restore logical file position")
1894 self._decoded_chars_used = chars_to_skip
1895
Antoine Pitroue4501852009-05-14 18:55:55 +00001896 # Finally, reset the encoder (merely useful for proper BOM handling)
1897 try:
1898 encoder = self._encoder or self._get_encoder()
1899 except LookupError:
1900 # Sometimes the encoder doesn't exist
1901 pass
1902 else:
1903 if cookie != 0:
1904 encoder.setstate(0)
1905 else:
1906 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 return cookie
1908
1909 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001910 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911 if n is None:
1912 n = -1
1913 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001914 try:
1915 n.__index__
1916 except AttributeError as err:
1917 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918 if n < 0:
1919 # Read everything.
1920 result = (self._get_decoded_chars() +
1921 decoder.decode(self.buffer.read(), final=True))
1922 self._set_decoded_chars('')
1923 self._snapshot = None
1924 return result
1925 else:
1926 # Keep reading chunks until we have n characters to return.
1927 eof = False
1928 result = self._get_decoded_chars(n)
1929 while len(result) < n and not eof:
1930 eof = not self._read_chunk()
1931 result += self._get_decoded_chars(n - len(result))
1932 return result
1933
1934 def __next__(self):
1935 self._telling = False
1936 line = self.readline()
1937 if not line:
1938 self._snapshot = None
1939 self._telling = self._seekable
1940 raise StopIteration
1941 return line
1942
1943 def readline(self, limit=None):
1944 if self.closed:
1945 raise ValueError("read from closed file")
1946 if limit is None:
1947 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001948 elif not isinstance(limit, int):
1949 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950
1951 # Grab all the decoded text (we will rewind any extra bits later).
1952 line = self._get_decoded_chars()
1953
1954 start = 0
1955 # Make the decoder if it doesn't already exist.
1956 if not self._decoder:
1957 self._get_decoder()
1958
1959 pos = endpos = None
1960 while True:
1961 if self._readtranslate:
1962 # Newlines are already translated, only search for \n
1963 pos = line.find('\n', start)
1964 if pos >= 0:
1965 endpos = pos + 1
1966 break
1967 else:
1968 start = len(line)
1969
1970 elif self._readuniversal:
1971 # Universal newline search. Find any of \r, \r\n, \n
1972 # The decoder ensures that \r\n are not split in two pieces
1973
1974 # In C we'd look for these in parallel of course.
1975 nlpos = line.find("\n", start)
1976 crpos = line.find("\r", start)
1977 if crpos == -1:
1978 if nlpos == -1:
1979 # Nothing found
1980 start = len(line)
1981 else:
1982 # Found \n
1983 endpos = nlpos + 1
1984 break
1985 elif nlpos == -1:
1986 # Found lone \r
1987 endpos = crpos + 1
1988 break
1989 elif nlpos < crpos:
1990 # Found \n
1991 endpos = nlpos + 1
1992 break
1993 elif nlpos == crpos + 1:
1994 # Found \r\n
1995 endpos = crpos + 2
1996 break
1997 else:
1998 # Found \r
1999 endpos = crpos + 1
2000 break
2001 else:
2002 # non-universal
2003 pos = line.find(self._readnl)
2004 if pos >= 0:
2005 endpos = pos + len(self._readnl)
2006 break
2007
2008 if limit >= 0 and len(line) >= limit:
2009 endpos = limit # reached length limit
2010 break
2011
2012 # No line ending seen yet - get more data'
2013 while self._read_chunk():
2014 if self._decoded_chars:
2015 break
2016 if self._decoded_chars:
2017 line += self._get_decoded_chars()
2018 else:
2019 # end of file
2020 self._set_decoded_chars('')
2021 self._snapshot = None
2022 return line
2023
2024 if limit >= 0 and endpos > limit:
2025 endpos = limit # don't exceed limit
2026
2027 # Rewind _decoded_chars to just after the line ending we found.
2028 self._rewind_decoded_chars(len(line) - endpos)
2029 return line[:endpos]
2030
2031 @property
2032 def newlines(self):
2033 return self._decoder.newlines if self._decoder else None
2034
2035
2036class StringIO(TextIOWrapper):
2037 """Text I/O implementation using an in-memory buffer.
2038
2039 The initial_value argument sets the value of object. The newline
2040 argument is like the one of TextIOWrapper's constructor.
2041 """
2042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 def __init__(self, initial_value="", newline="\n"):
2044 super(StringIO, self).__init__(BytesIO(),
2045 encoding="utf-8",
2046 errors="strict",
2047 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002048 # Issue #5645: make universal newlines semantics the same as in the
2049 # C version, even under Windows.
2050 if newline is None:
2051 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002052 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002054 raise TypeError("initial_value must be str or None, not {0}"
2055 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 initial_value = str(initial_value)
2057 self.write(initial_value)
2058 self.seek(0)
2059
2060 def getvalue(self):
2061 self.flush()
2062 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002063
2064 def __repr__(self):
2065 # TextIOWrapper tells the encoding in its repr. In StringIO,
2066 # that's a implementation detail.
2067 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002068
2069 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002070 def errors(self):
2071 return None
2072
2073 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002074 def encoding(self):
2075 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002076
2077 def detach(self):
2078 # This doesn't make sense on StringIO.
2079 self._unsupported("detach")