blob: 41402b1452707ea538c37d6d36cad1d589a3f387 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01009import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000010# Import _thread instead of threading to reduce startup cost
11try:
12 from _thread import allocate_lock as Lock
13except ImportError:
14 from _dummy_thread import allocate_lock as Lock
15
16import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000017from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020026# Rebind for compatibility
27BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000028
29
Georg Brandl4d73b572011-01-13 07:13:06 +000030def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020031 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33 r"""Open file and return a stream. Raise IOError upon failure.
34
35 file is either a text or byte string giving the name (and the path
36 if the file isn't in the current working directory) of the file to
37 be opened or an integer file descriptor of the file to be
38 wrapped. (If a file descriptor is given, it is closed when the
39 returned I/O object is closed, unless closefd is set to False.)
40
Charles-François Natalidc3044c2012-01-09 22:40:02 +010041 mode is an optional string that specifies the mode in which the file is
42 opened. It defaults to 'r' which means open for reading in text mode. Other
43 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010044 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010045 (which on some Unix systems, means that all writes append to the end of the
46 file regardless of the current seek position). In text mode, if encoding is
47 not specified the encoding used is platform dependent. (For reading and
48 writing raw bytes use binary mode and leave encoding unspecified.) The
49 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000050
51 ========= ===============================================================
52 Character Meaning
53 --------- ---------------------------------------------------------------
54 'r' open for reading (default)
55 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010056 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000057 'a' open for writing, appending to the end of the file if it exists
58 'b' binary mode
59 't' text mode (default)
60 '+' open a disk file for updating (reading and writing)
61 'U' universal newline mode (for backwards compatibility; unneeded
62 for new code)
63 ========= ===============================================================
64
65 The default mode is 'rt' (open for reading text). For binary random
66 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010067 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
68 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000069
70 Python distinguishes between files opened in binary and text modes,
71 even when the underlying operating system doesn't. Files opened in
72 binary mode (appending 'b' to the mode argument) return contents as
73 bytes objects without any decoding. In text mode (the default, or when
74 't' is appended to the mode argument), the contents of the file are
75 returned as strings, the bytes having been first decoded using a
76 platform-dependent encoding or using the specified encoding if given.
77
Antoine Pitroud5587bc2009-12-19 21:08:31 +000078 buffering is an optional integer used to set the buffering policy.
79 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
80 line buffering (only usable in text mode), and an integer > 1 to indicate
81 the size of a fixed-size chunk buffer. When no buffering argument is
82 given, the default buffering policy works as follows:
83
84 * Binary files are buffered in fixed-size chunks; the size of the buffer
85 is chosen using a heuristic trying to determine the underlying device's
86 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
87 On many systems, the buffer will typically be 4096 or 8192 bytes long.
88
89 * "Interactive" text files (files for which isatty() returns True)
90 use line buffering. Other text files use the policy described above
91 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092
Raymond Hettingercbb80892011-01-13 18:15:51 +000093 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 file. This should only be used in text mode. The default encoding is
95 platform dependent, but any encoding supported by Python can be
96 passed. See the codecs module for the list of supported encodings.
97
98 errors is an optional string that specifies how encoding errors are to
99 be handled---this argument should not be used in binary mode. Pass
100 'strict' to raise a ValueError exception if there is an encoding error
101 (the default of None has the same effect), or pass 'ignore' to ignore
102 errors. (Note that ignoring encoding errors can lead to data loss.)
103 See the documentation for codecs.register for a list of the permitted
104 encoding error strings.
105
Raymond Hettingercbb80892011-01-13 18:15:51 +0000106 newline is a string controlling how universal newlines works (it only
107 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
108 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000109
110 * On input, if newline is None, universal newlines mode is
111 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
112 these are translated into '\n' before being returned to the
113 caller. If it is '', universal newline mode is enabled, but line
114 endings are returned to the caller untranslated. If it has any of
115 the other legal values, input lines are only terminated by the given
116 string, and the line ending is returned to the caller untranslated.
117
118 * On output, if newline is None, any '\n' characters written are
119 translated to the system default line separator, os.linesep. If
120 newline is '', no translation takes place. If newline is any of the
121 other legal values, any '\n' characters written are translated to
122 the given string.
123
Raymond Hettingercbb80892011-01-13 18:15:51 +0000124 closedfd is a bool. If closefd is False, the underlying file descriptor will
125 be kept open when the file is closed. This does not work when a file name is
126 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000127
Ross Lagerwall59142db2011-10-31 20:34:46 +0200128 A custom opener can be used by passing a callable as *opener*. The
129 underlying file descriptor for the file object is then obtained by calling
130 *opener* with (*file*, *flags*). *opener* must return an open file
131 descriptor (passing os.open as *opener* results in functionality similar to
132 passing None).
133
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100159 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000160 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100161 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 reading = "r" in modes
163 writing = "w" in modes
164 appending = "a" in modes
165 updating = "+" in modes
166 text = "t" in modes
167 binary = "b" in modes
168 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100169 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise ValueError("can't use U and writing mode at once")
171 reading = True
172 if text and binary:
173 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100174 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 raise ValueError("must have exactly one of read/write/append mode")
178 if binary and encoding is not None:
179 raise ValueError("binary mode doesn't take an encoding argument")
180 if binary and errors is not None:
181 raise ValueError("binary mode doesn't take an errors argument")
182 if binary and newline is not None:
183 raise ValueError("binary mode doesn't take a newline argument")
184 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100185 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 (reading and "r" or "") +
187 (writing and "w" or "") +
188 (appending and "a" or "") +
189 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200190 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 line_buffering = False
192 if buffering == 1 or buffering < 0 and raw.isatty():
193 buffering = -1
194 line_buffering = True
195 if buffering < 0:
196 buffering = DEFAULT_BUFFER_SIZE
197 try:
198 bs = os.fstat(raw.fileno()).st_blksize
199 except (os.error, AttributeError):
200 pass
201 else:
202 if bs > 1:
203 buffering = bs
204 if buffering < 0:
205 raise ValueError("invalid buffering size")
206 if buffering == 0:
207 if binary:
208 return raw
209 raise ValueError("can't have unbuffered text I/O")
210 if updating:
211 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100212 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000213 buffer = BufferedWriter(raw, buffering)
214 elif reading:
215 buffer = BufferedReader(raw, buffering)
216 else:
217 raise ValueError("unknown mode: %r" % mode)
218 if binary:
219 return buffer
220 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
221 text.mode = mode
222 return text
223
224
225class DocDescriptor:
226 """Helper for builtins.open.__doc__
227 """
228 def __get__(self, obj, typ):
229 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000230 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231 "errors=None, newline=None, closefd=True)\n\n" +
232 open.__doc__)
233
234class OpenWrapper:
235 """Wrapper for builtins.open
236
237 Trick so that open won't become a bound method when stored
238 as a class variable (as dbm.dumb does).
239
240 See initstdio() in Python/pythonrun.c.
241 """
242 __doc__ = DocDescriptor()
243
244 def __new__(cls, *args, **kwargs):
245 return open(*args, **kwargs)
246
247
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000248# In normal operation, both `UnsupportedOperation`s should be bound to the
249# same object.
250try:
251 UnsupportedOperation = io.UnsupportedOperation
252except AttributeError:
253 class UnsupportedOperation(ValueError, IOError):
254 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255
256
257class IOBase(metaclass=abc.ABCMeta):
258
259 """The abstract base class for all I/O classes, acting on streams of
260 bytes. There is no public constructor.
261
262 This class provides dummy implementations for many methods that
263 derived classes can override selectively; the default implementations
264 represent a file that cannot be read, written or seeked.
265
266 Even though IOBase does not declare read, readinto, or write because
267 their signatures will vary, implementations and clients should
268 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000269 may raise UnsupportedOperation when operations they do not support are
270 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000271
272 The basic type used for binary data read from or written to a file is
273 bytes. bytearrays are accepted too, and in some cases (such as
274 readinto) needed. Text I/O classes work with str data.
275
276 Note that calling any method (even inquiries) on a closed stream is
277 undefined. Implementations may raise IOError in this case.
278
279 IOBase (and its subclasses) support the iterator protocol, meaning
280 that an IOBase object can be iterated over yielding the lines in a
281 stream.
282
283 IOBase also supports the :keyword:`with` statement. In this example,
284 fp is closed after the suite of the with statement is complete:
285
286 with open('spam.txt', 'r') as fp:
287 fp.write('Spam and eggs!')
288 """
289
290 ### Internal ###
291
Raymond Hettinger3c940242011-01-12 23:39:31 +0000292 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000293 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000294 raise UnsupportedOperation("%s.%s() not supported" %
295 (self.__class__.__name__, name))
296
297 ### Positioning ###
298
Georg Brandl4d73b572011-01-13 07:13:06 +0000299 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000300 """Change stream position.
301
302 Change the stream position to byte offset offset. offset is
303 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000304 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305
306 * 0 -- start of stream (the default); offset should be zero or positive
307 * 1 -- current stream position; offset may be negative
308 * 2 -- end of stream; offset is usually negative
Jesus Cea2b47f0a2012-04-26 16:39:35 +0200309 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
Raymond Hettingercbb80892011-01-13 18:15:51 +0000311 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 """
313 self._unsupported("seek")
314
Raymond Hettinger3c940242011-01-12 23:39:31 +0000315 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000316 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 return self.seek(0, 1)
318
Georg Brandl4d73b572011-01-13 07:13:06 +0000319 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 """Truncate file to size bytes.
321
322 Size defaults to the current IO position as reported by tell(). Return
323 the new size.
324 """
325 self._unsupported("truncate")
326
327 ### Flush and close ###
328
Raymond Hettinger3c940242011-01-12 23:39:31 +0000329 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330 """Flush write buffers, if applicable.
331
332 This is not implemented for read-only and non-blocking streams.
333 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000334 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 # XXX Should this return the number of bytes written???
336
337 __closed = False
338
Raymond Hettinger3c940242011-01-12 23:39:31 +0000339 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """Flush and close the IO object.
341
342 This method has no effect if the file is already closed.
343 """
344 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000345 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 self.__closed = True
347
Raymond Hettinger3c940242011-01-12 23:39:31 +0000348 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000349 """Destructor. Calls close()."""
350 # The try/except block is in case this is called at program
351 # exit time, when it's possible that globals have already been
352 # deleted, and then the close() call might fail. Since
353 # there's nothing we can do about such failures and they annoy
354 # the end users, we suppress the traceback.
355 try:
356 self.close()
357 except:
358 pass
359
360 ### Inquiries ###
361
Raymond Hettinger3c940242011-01-12 23:39:31 +0000362 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000363 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000365 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 This method may need to do a test seek().
367 """
368 return False
369
370 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000371 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 """
373 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000374 raise UnsupportedOperation("File or stream is not seekable."
375 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Raymond Hettinger3c940242011-01-12 23:39:31 +0000377 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000378 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000380 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 """
382 return False
383
384 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000385 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386 """
387 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000388 raise UnsupportedOperation("File or stream is not readable."
389 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Raymond Hettinger3c940242011-01-12 23:39:31 +0000391 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000392 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000394 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 """
396 return False
397
398 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000399 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 """
401 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000402 raise UnsupportedOperation("File or stream is not writable."
403 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404
405 @property
406 def closed(self):
407 """closed: bool. True iff the file has been closed.
408
409 For backwards compatibility, this is a property, not a predicate.
410 """
411 return self.__closed
412
413 def _checkClosed(self, msg=None):
414 """Internal: raise an ValueError if file is closed
415 """
416 if self.closed:
417 raise ValueError("I/O operation on closed file."
418 if msg is None else msg)
419
420 ### Context manager ###
421
Raymond Hettinger3c940242011-01-12 23:39:31 +0000422 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000423 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 self._checkClosed()
425 return self
426
Raymond Hettinger3c940242011-01-12 23:39:31 +0000427 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 """Context management protocol. Calls close()"""
429 self.close()
430
431 ### Lower-level APIs ###
432
433 # XXX Should these be present even if unimplemented?
434
Raymond Hettinger3c940242011-01-12 23:39:31 +0000435 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000436 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437
438 An IOError is raised if the IO object does not use a file descriptor.
439 """
440 self._unsupported("fileno")
441
Raymond Hettinger3c940242011-01-12 23:39:31 +0000442 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000443 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000444
445 Return False if it can't be determined.
446 """
447 self._checkClosed()
448 return False
449
450 ### Readline[s] and writelines ###
451
Georg Brandl4d73b572011-01-13 07:13:06 +0000452 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
455 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000456 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457
458 The line terminator is always b'\n' for binary files; for text
459 files, the newlines argument to open can be used to select the line
460 terminator(s) recognized.
461 """
462 # For backwards compatibility, a (slowish) readline().
463 if hasattr(self, "peek"):
464 def nreadahead():
465 readahead = self.peek(1)
466 if not readahead:
467 return 1
468 n = (readahead.find(b"\n") + 1) or len(readahead)
469 if limit >= 0:
470 n = min(n, limit)
471 return n
472 else:
473 def nreadahead():
474 return 1
475 if limit is None:
476 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000477 elif not isinstance(limit, int):
478 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 res = bytearray()
480 while limit < 0 or len(res) < limit:
481 b = self.read(nreadahead())
482 if not b:
483 break
484 res += b
485 if res.endswith(b"\n"):
486 break
487 return bytes(res)
488
489 def __iter__(self):
490 self._checkClosed()
491 return self
492
493 def __next__(self):
494 line = self.readline()
495 if not line:
496 raise StopIteration
497 return line
498
499 def readlines(self, hint=None):
500 """Return a list of lines from the stream.
501
502 hint can be specified to control the number of lines read: no more
503 lines will be read if the total size (in bytes/characters) of all
504 lines so far exceeds hint.
505 """
506 if hint is None or hint <= 0:
507 return list(self)
508 n = 0
509 lines = []
510 for line in self:
511 lines.append(line)
512 n += len(line)
513 if n >= hint:
514 break
515 return lines
516
517 def writelines(self, lines):
518 self._checkClosed()
519 for line in lines:
520 self.write(line)
521
522io.IOBase.register(IOBase)
523
524
525class RawIOBase(IOBase):
526
527 """Base class for raw binary I/O."""
528
529 # The read() method is implemented by calling readinto(); derived
530 # classes that want to support read() only need to implement
531 # readinto() as a primitive operation. In general, readinto() can be
532 # more efficient than read().
533
534 # (It would be tempting to also provide an implementation of
535 # readinto() in terms of read(), in case the latter is a more suitable
536 # primitive operation, but that would lead to nasty recursion in case
537 # a subclass doesn't implement either.)
538
Georg Brandl4d73b572011-01-13 07:13:06 +0000539 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000540 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000541
542 Returns an empty bytes object on EOF, or None if the object is
543 set not to block and has no data to read.
544 """
545 if n is None:
546 n = -1
547 if n < 0:
548 return self.readall()
549 b = bytearray(n.__index__())
550 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000551 if n is None:
552 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000553 del b[n:]
554 return bytes(b)
555
556 def readall(self):
557 """Read until EOF, using multiple read() call."""
558 res = bytearray()
559 while True:
560 data = self.read(DEFAULT_BUFFER_SIZE)
561 if not data:
562 break
563 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200564 if res:
565 return bytes(res)
566 else:
567 # b'' or None
568 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569
Raymond Hettinger3c940242011-01-12 23:39:31 +0000570 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000571 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
Raymond Hettingercbb80892011-01-13 18:15:51 +0000573 Returns an int representing the number of bytes read (0 for EOF), or
574 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 """
576 self._unsupported("readinto")
577
Raymond Hettinger3c940242011-01-12 23:39:31 +0000578 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 """Write the given buffer to the IO stream.
580
581 Returns the number of bytes written, which may be less than len(b).
582 """
583 self._unsupported("write")
584
585io.RawIOBase.register(RawIOBase)
586from _io import FileIO
587RawIOBase.register(FileIO)
588
589
590class BufferedIOBase(IOBase):
591
592 """Base class for buffered IO objects.
593
594 The main difference with RawIOBase is that the read() method
595 supports omitting the size argument, and does not have a default
596 implementation that defers to readinto().
597
598 In addition, read(), readinto() and write() may raise
599 BlockingIOError if the underlying raw stream is in non-blocking
600 mode and not ready; unlike their raw counterparts, they will never
601 return None.
602
603 A typical implementation should not inherit from a RawIOBase
604 implementation, but wrap one.
605 """
606
Georg Brandl4d73b572011-01-13 07:13:06 +0000607 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000608 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609
610 If the argument is omitted, None, or negative, reads and
611 returns all data until EOF.
612
613 If the argument is positive, and the underlying raw stream is
614 not 'interactive', multiple raw reads may be issued to satisfy
615 the byte count (unless EOF is reached first). But for
616 interactive raw streams (XXX and for pipes?), at most one raw
617 read will be issued, and a short result does not imply that
618 EOF is imminent.
619
620 Returns an empty bytes array on EOF.
621
622 Raises BlockingIOError if the underlying raw stream has no
623 data at the moment.
624 """
625 self._unsupported("read")
626
Georg Brandl4d73b572011-01-13 07:13:06 +0000627 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000628 """Read up to n bytes with at most one read() system call,
629 where n is an int.
630 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631 self._unsupported("read1")
632
Raymond Hettinger3c940242011-01-12 23:39:31 +0000633 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000634 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635
636 Like read(), this may issue multiple reads to the underlying raw
637 stream, unless the latter is 'interactive'.
638
Raymond Hettingercbb80892011-01-13 18:15:51 +0000639 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
641 Raises BlockingIOError if the underlying raw stream has no
642 data at the moment.
643 """
644 # XXX This ought to work with anything that supports the buffer API
645 data = self.read(len(b))
646 n = len(data)
647 try:
648 b[:n] = data
649 except TypeError as err:
650 import array
651 if not isinstance(b, array.array):
652 raise err
653 b[:n] = array.array('b', data)
654 return n
655
Raymond Hettinger3c940242011-01-12 23:39:31 +0000656 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000657 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658
659 Return the number of bytes written, which is never less than
660 len(b).
661
662 Raises BlockingIOError if the buffer is full and the
663 underlying raw stream cannot accept more data at the moment.
664 """
665 self._unsupported("write")
666
Raymond Hettinger3c940242011-01-12 23:39:31 +0000667 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000668 """
669 Separate the underlying raw stream from the buffer and return it.
670
671 After the raw stream has been detached, the buffer is in an unusable
672 state.
673 """
674 self._unsupported("detach")
675
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676io.BufferedIOBase.register(BufferedIOBase)
677
678
679class _BufferedIOMixin(BufferedIOBase):
680
681 """A mixin implementation of BufferedIOBase with an underlying raw stream.
682
683 This passes most requests on to the underlying raw stream. It
684 does *not* provide implementations of read(), readinto() or
685 write().
686 """
687
688 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000689 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 ### Positioning ###
692
693 def seek(self, pos, whence=0):
694 new_position = self.raw.seek(pos, whence)
695 if new_position < 0:
696 raise IOError("seek() returned an invalid position")
697 return new_position
698
699 def tell(self):
700 pos = self.raw.tell()
701 if pos < 0:
702 raise IOError("tell() returned an invalid position")
703 return pos
704
705 def truncate(self, pos=None):
706 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
707 # and a flush may be necessary to synch both views of the current
708 # file state.
709 self.flush()
710
711 if pos is None:
712 pos = self.tell()
713 # XXX: Should seek() be used, instead of passing the position
714 # XXX directly to truncate?
715 return self.raw.truncate(pos)
716
717 ### Flush and close ###
718
719 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000720 if self.closed:
721 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722 self.raw.flush()
723
724 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000725 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100726 try:
727 # may raise BlockingIOError or BrokenPipeError etc
728 self.flush()
729 finally:
730 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000732 def detach(self):
733 if self.raw is None:
734 raise ValueError("raw stream already detached")
735 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000736 raw = self._raw
737 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000738 return raw
739
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740 ### Inquiries ###
741
742 def seekable(self):
743 return self.raw.seekable()
744
745 def readable(self):
746 return self.raw.readable()
747
748 def writable(self):
749 return self.raw.writable()
750
751 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000752 def raw(self):
753 return self._raw
754
755 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000756 def closed(self):
757 return self.raw.closed
758
759 @property
760 def name(self):
761 return self.raw.name
762
763 @property
764 def mode(self):
765 return self.raw.mode
766
Antoine Pitrou243757e2010-11-05 21:15:39 +0000767 def __getstate__(self):
768 raise TypeError("can not serialize a '{0}' object"
769 .format(self.__class__.__name__))
770
Antoine Pitrou716c4442009-05-23 19:04:03 +0000771 def __repr__(self):
772 clsname = self.__class__.__name__
773 try:
774 name = self.name
775 except AttributeError:
776 return "<_pyio.{0}>".format(clsname)
777 else:
778 return "<_pyio.{0} name={1!r}>".format(clsname, name)
779
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780 ### Lower-level APIs ###
781
782 def fileno(self):
783 return self.raw.fileno()
784
785 def isatty(self):
786 return self.raw.isatty()
787
788
789class BytesIO(BufferedIOBase):
790
791 """Buffered I/O implementation using an in-memory bytes buffer."""
792
793 def __init__(self, initial_bytes=None):
794 buf = bytearray()
795 if initial_bytes is not None:
796 buf += initial_bytes
797 self._buffer = buf
798 self._pos = 0
799
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000800 def __getstate__(self):
801 if self.closed:
802 raise ValueError("__getstate__ on closed file")
803 return self.__dict__.copy()
804
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 def getvalue(self):
806 """Return the bytes value (contents) of the buffer
807 """
808 if self.closed:
809 raise ValueError("getvalue on closed file")
810 return bytes(self._buffer)
811
Antoine Pitrou972ee132010-09-06 18:48:21 +0000812 def getbuffer(self):
813 """Return a readable and writable view of the buffer.
814 """
815 return memoryview(self._buffer)
816
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817 def read(self, n=None):
818 if self.closed:
819 raise ValueError("read from closed file")
820 if n is None:
821 n = -1
822 if n < 0:
823 n = len(self._buffer)
824 if len(self._buffer) <= self._pos:
825 return b""
826 newpos = min(len(self._buffer), self._pos + n)
827 b = self._buffer[self._pos : newpos]
828 self._pos = newpos
829 return bytes(b)
830
831 def read1(self, n):
832 """This is the same as read.
833 """
834 return self.read(n)
835
836 def write(self, b):
837 if self.closed:
838 raise ValueError("write to closed file")
839 if isinstance(b, str):
840 raise TypeError("can't write str to binary stream")
841 n = len(b)
842 if n == 0:
843 return 0
844 pos = self._pos
845 if pos > len(self._buffer):
846 # Inserts null bytes between the current end of the file
847 # and the new write position.
848 padding = b'\x00' * (pos - len(self._buffer))
849 self._buffer += padding
850 self._buffer[pos:pos + n] = b
851 self._pos += n
852 return n
853
854 def seek(self, pos, whence=0):
855 if self.closed:
856 raise ValueError("seek on closed file")
857 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000858 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000859 except AttributeError as err:
860 raise TypeError("an integer is required") from err
861 if whence == 0:
862 if pos < 0:
863 raise ValueError("negative seek position %r" % (pos,))
864 self._pos = pos
865 elif whence == 1:
866 self._pos = max(0, self._pos + pos)
867 elif whence == 2:
868 self._pos = max(0, len(self._buffer) + pos)
869 else:
Jesus Cea2b47f0a2012-04-26 16:39:35 +0200870 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000871 return self._pos
872
873 def tell(self):
874 if self.closed:
875 raise ValueError("tell on closed file")
876 return self._pos
877
878 def truncate(self, pos=None):
879 if self.closed:
880 raise ValueError("truncate on closed file")
881 if pos is None:
882 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000883 else:
884 try:
885 pos.__index__
886 except AttributeError as err:
887 raise TypeError("an integer is required") from err
888 if pos < 0:
889 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000890 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000891 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 def readable(self):
894 return True
895
896 def writable(self):
897 return True
898
899 def seekable(self):
900 return True
901
902
903class BufferedReader(_BufferedIOMixin):
904
905 """BufferedReader(raw[, buffer_size])
906
907 A buffer for a readable, sequential BaseRawIO object.
908
909 The constructor creates a BufferedReader for the given readable raw
910 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
911 is used.
912 """
913
914 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
915 """Create a new buffered reader using the given readable raw IO object.
916 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000917 if not raw.readable():
918 raise IOError('"raw" argument must be readable.')
919
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 _BufferedIOMixin.__init__(self, raw)
921 if buffer_size <= 0:
922 raise ValueError("invalid buffer size")
923 self.buffer_size = buffer_size
924 self._reset_read_buf()
925 self._read_lock = Lock()
926
927 def _reset_read_buf(self):
928 self._read_buf = b""
929 self._read_pos = 0
930
931 def read(self, n=None):
932 """Read n bytes.
933
934 Returns exactly n bytes of data unless the underlying raw IO
935 stream reaches EOF or if the call would block in non-blocking
936 mode. If n is negative, read until EOF or until read() would
937 block.
938 """
939 if n is not None and n < -1:
940 raise ValueError("invalid number of bytes to read")
941 with self._read_lock:
942 return self._read_unlocked(n)
943
944 def _read_unlocked(self, n=None):
945 nodata_val = b""
946 empty_values = (b"", None)
947 buf = self._read_buf
948 pos = self._read_pos
949
950 # Special case for when the number of bytes to read is unspecified.
951 if n is None or n == -1:
952 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200953 if hasattr(self.raw, 'readall'):
954 chunk = self.raw.readall()
955 if chunk is None:
956 return buf[pos:] or None
957 else:
958 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959 chunks = [buf[pos:]] # Strip the consumed bytes.
960 current_size = 0
961 while True:
962 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000963 try:
964 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200965 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000966 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000967 if chunk in empty_values:
968 nodata_val = chunk
969 break
970 current_size += len(chunk)
971 chunks.append(chunk)
972 return b"".join(chunks) or nodata_val
973
974 # The number of bytes to read is specified, return at most n bytes.
975 avail = len(buf) - pos # Length of the available buffered data.
976 if n <= avail:
977 # Fast path: the data to read is fully buffered.
978 self._read_pos += n
979 return buf[pos:pos+n]
980 # Slow path: read from the stream until enough bytes are read,
981 # or until an EOF occurs or until read() would block.
982 chunks = [buf[pos:]]
983 wanted = max(self.buffer_size, n)
984 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000985 try:
986 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200987 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000988 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if chunk in empty_values:
990 nodata_val = chunk
991 break
992 avail += len(chunk)
993 chunks.append(chunk)
994 # n is more then avail only when an EOF occurred or when
995 # read() would have blocked.
996 n = min(n, avail)
997 out = b"".join(chunks)
998 self._read_buf = out[n:] # Save the extra data in the buffer.
999 self._read_pos = 0
1000 return out[:n] if out else nodata_val
1001
1002 def peek(self, n=0):
1003 """Returns buffered bytes without advancing the position.
1004
1005 The argument indicates a desired minimal number of bytes; we
1006 do at most one raw read to satisfy it. We never return more
1007 than self.buffer_size.
1008 """
1009 with self._read_lock:
1010 return self._peek_unlocked(n)
1011
1012 def _peek_unlocked(self, n=0):
1013 want = min(n, self.buffer_size)
1014 have = len(self._read_buf) - self._read_pos
1015 if have < want or have <= 0:
1016 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001017 while True:
1018 try:
1019 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001020 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001021 continue
1022 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 if current:
1024 self._read_buf = self._read_buf[self._read_pos:] + current
1025 self._read_pos = 0
1026 return self._read_buf[self._read_pos:]
1027
1028 def read1(self, n):
1029 """Reads up to n bytes, with at most one read() system call."""
1030 # Returns up to n bytes. If at least one byte is buffered, we
1031 # only return buffered bytes. Otherwise, we do one raw read.
1032 if n < 0:
1033 raise ValueError("number of bytes to read must be positive")
1034 if n == 0:
1035 return b""
1036 with self._read_lock:
1037 self._peek_unlocked(1)
1038 return self._read_unlocked(
1039 min(n, len(self._read_buf) - self._read_pos))
1040
1041 def tell(self):
1042 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1043
1044 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001045 with self._read_lock:
1046 if whence == 1:
1047 pos -= len(self._read_buf) - self._read_pos
1048 pos = _BufferedIOMixin.seek(self, pos, whence)
1049 self._reset_read_buf()
1050 return pos
1051
1052class BufferedWriter(_BufferedIOMixin):
1053
1054 """A buffer for a writeable sequential RawIO object.
1055
1056 The constructor creates a BufferedWriter for the given writeable raw
1057 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001058 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 """
1060
Benjamin Peterson59406a92009-03-26 17:10:29 +00001061 _warning_stack_offset = 2
1062
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 def __init__(self, raw,
1064 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001065 if not raw.writable():
1066 raise IOError('"raw" argument must be writable.')
1067
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 _BufferedIOMixin.__init__(self, raw)
1069 if buffer_size <= 0:
1070 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001071 if max_buffer_size is not None:
1072 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1073 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001074 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075 self._write_buf = bytearray()
1076 self._write_lock = Lock()
1077
1078 def write(self, b):
1079 if self.closed:
1080 raise ValueError("write to closed file")
1081 if isinstance(b, str):
1082 raise TypeError("can't write str to binary stream")
1083 with self._write_lock:
1084 # XXX we can implement some more tricks to try and avoid
1085 # partial writes
1086 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001087 # We're full, so let's pre-flush the buffer. (This may
1088 # raise BlockingIOError with characters_written == 0.)
1089 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 before = len(self._write_buf)
1091 self._write_buf.extend(b)
1092 written = len(self._write_buf) - before
1093 if len(self._write_buf) > self.buffer_size:
1094 try:
1095 self._flush_unlocked()
1096 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001097 if len(self._write_buf) > self.buffer_size:
1098 # We've hit the buffer_size. We have to accept a partial
1099 # write and cut back our buffer.
1100 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001101 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001102 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103 raise BlockingIOError(e.errno, e.strerror, written)
1104 return written
1105
1106 def truncate(self, pos=None):
1107 with self._write_lock:
1108 self._flush_unlocked()
1109 if pos is None:
1110 pos = self.raw.tell()
1111 return self.raw.truncate(pos)
1112
1113 def flush(self):
1114 with self._write_lock:
1115 self._flush_unlocked()
1116
1117 def _flush_unlocked(self):
1118 if self.closed:
1119 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001120 while self._write_buf:
1121 try:
1122 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001123 except InterruptedError:
1124 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001125 except BlockingIOError:
1126 raise RuntimeError("self.raw should implement RawIOBase: it "
1127 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001128 if n is None:
1129 raise BlockingIOError(
1130 errno.EAGAIN,
1131 "write could not complete without blocking", 0)
1132 if n > len(self._write_buf) or n < 0:
1133 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135
1136 def tell(self):
1137 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1138
1139 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 with self._write_lock:
1141 self._flush_unlocked()
1142 return _BufferedIOMixin.seek(self, pos, whence)
1143
1144
1145class BufferedRWPair(BufferedIOBase):
1146
1147 """A buffered reader and writer object together.
1148
1149 A buffered reader object and buffered writer object put together to
1150 form a sequential IO object that can read and write. This is typically
1151 used with a socket or two-way pipe.
1152
1153 reader and writer are RawIOBase objects that are readable and
1154 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001155 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 """
1157
1158 # XXX The usefulness of this (compared to having two separate IO
1159 # objects) is questionable.
1160
1161 def __init__(self, reader, writer,
1162 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1163 """Constructor.
1164
1165 The arguments are two RawIO instances.
1166 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001167 if max_buffer_size is not None:
1168 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001169
1170 if not reader.readable():
1171 raise IOError('"reader" argument must be readable.')
1172
1173 if not writer.writable():
1174 raise IOError('"writer" argument must be writable.')
1175
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001177 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178
1179 def read(self, n=None):
1180 if n is None:
1181 n = -1
1182 return self.reader.read(n)
1183
1184 def readinto(self, b):
1185 return self.reader.readinto(b)
1186
1187 def write(self, b):
1188 return self.writer.write(b)
1189
1190 def peek(self, n=0):
1191 return self.reader.peek(n)
1192
1193 def read1(self, n):
1194 return self.reader.read1(n)
1195
1196 def readable(self):
1197 return self.reader.readable()
1198
1199 def writable(self):
1200 return self.writer.writable()
1201
1202 def flush(self):
1203 return self.writer.flush()
1204
1205 def close(self):
1206 self.writer.close()
1207 self.reader.close()
1208
1209 def isatty(self):
1210 return self.reader.isatty() or self.writer.isatty()
1211
1212 @property
1213 def closed(self):
1214 return self.writer.closed
1215
1216
1217class BufferedRandom(BufferedWriter, BufferedReader):
1218
1219 """A buffered interface to random access streams.
1220
1221 The constructor creates a reader and writer for a seekable stream,
1222 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001223 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224 """
1225
Benjamin Peterson59406a92009-03-26 17:10:29 +00001226 _warning_stack_offset = 3
1227
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228 def __init__(self, raw,
1229 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1230 raw._checkSeekable()
1231 BufferedReader.__init__(self, raw, buffer_size)
1232 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1233
1234 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 self.flush()
1236 if self._read_buf:
1237 # Undo read ahead.
1238 with self._read_lock:
1239 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1240 # First do the raw seek, then empty the read buffer, so that
1241 # if the raw seek fails, we don't lose buffered data forever.
1242 pos = self.raw.seek(pos, whence)
1243 with self._read_lock:
1244 self._reset_read_buf()
1245 if pos < 0:
1246 raise IOError("seek() returned invalid position")
1247 return pos
1248
1249 def tell(self):
1250 if self._write_buf:
1251 return BufferedWriter.tell(self)
1252 else:
1253 return BufferedReader.tell(self)
1254
1255 def truncate(self, pos=None):
1256 if pos is None:
1257 pos = self.tell()
1258 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001259 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260
1261 def read(self, n=None):
1262 if n is None:
1263 n = -1
1264 self.flush()
1265 return BufferedReader.read(self, n)
1266
1267 def readinto(self, b):
1268 self.flush()
1269 return BufferedReader.readinto(self, b)
1270
1271 def peek(self, n=0):
1272 self.flush()
1273 return BufferedReader.peek(self, n)
1274
1275 def read1(self, n):
1276 self.flush()
1277 return BufferedReader.read1(self, n)
1278
1279 def write(self, b):
1280 if self._read_buf:
1281 # Undo readahead
1282 with self._read_lock:
1283 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1284 self._reset_read_buf()
1285 return BufferedWriter.write(self, b)
1286
1287
1288class TextIOBase(IOBase):
1289
1290 """Base class for text I/O.
1291
1292 This class provides a character and line based interface to stream
1293 I/O. There is no readinto method because Python's character strings
1294 are immutable. There is no public constructor.
1295 """
1296
Georg Brandl4d73b572011-01-13 07:13:06 +00001297 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001298 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299
1300 Read from underlying buffer until we have n characters or we hit EOF.
1301 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001302
1303 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304 """
1305 self._unsupported("read")
1306
Raymond Hettinger3c940242011-01-12 23:39:31 +00001307 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001308 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 self._unsupported("write")
1310
Georg Brandl4d73b572011-01-13 07:13:06 +00001311 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001312 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001313 self._unsupported("truncate")
1314
Raymond Hettinger3c940242011-01-12 23:39:31 +00001315 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 """Read until newline or EOF.
1317
1318 Returns an empty string if EOF is hit immediately.
1319 """
1320 self._unsupported("readline")
1321
Raymond Hettinger3c940242011-01-12 23:39:31 +00001322 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001323 """
1324 Separate the underlying buffer from the TextIOBase and return it.
1325
1326 After the underlying buffer has been detached, the TextIO is in an
1327 unusable state.
1328 """
1329 self._unsupported("detach")
1330
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001331 @property
1332 def encoding(self):
1333 """Subclasses should override."""
1334 return None
1335
1336 @property
1337 def newlines(self):
1338 """Line endings translated so far.
1339
1340 Only line endings translated during reading are considered.
1341
1342 Subclasses should override.
1343 """
1344 return None
1345
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001346 @property
1347 def errors(self):
1348 """Error setting of the decoder or encoder.
1349
1350 Subclasses should override."""
1351 return None
1352
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353io.TextIOBase.register(TextIOBase)
1354
1355
1356class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1357 r"""Codec used when reading a file in universal newlines mode. It wraps
1358 another incremental decoder, translating \r\n and \r into \n. It also
1359 records the types of newlines encountered. When used with
1360 translate=False, it ensures that the newline sequence is returned in
1361 one piece.
1362 """
1363 def __init__(self, decoder, translate, errors='strict'):
1364 codecs.IncrementalDecoder.__init__(self, errors=errors)
1365 self.translate = translate
1366 self.decoder = decoder
1367 self.seennl = 0
1368 self.pendingcr = False
1369
1370 def decode(self, input, final=False):
1371 # decode input (with the eventual \r from a previous pass)
1372 if self.decoder is None:
1373 output = input
1374 else:
1375 output = self.decoder.decode(input, final=final)
1376 if self.pendingcr and (output or final):
1377 output = "\r" + output
1378 self.pendingcr = False
1379
1380 # retain last \r even when not translating data:
1381 # then readline() is sure to get \r\n in one pass
1382 if output.endswith("\r") and not final:
1383 output = output[:-1]
1384 self.pendingcr = True
1385
1386 # Record which newlines are read
1387 crlf = output.count('\r\n')
1388 cr = output.count('\r') - crlf
1389 lf = output.count('\n') - crlf
1390 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1391 | (crlf and self._CRLF)
1392
1393 if self.translate:
1394 if crlf:
1395 output = output.replace("\r\n", "\n")
1396 if cr:
1397 output = output.replace("\r", "\n")
1398
1399 return output
1400
1401 def getstate(self):
1402 if self.decoder is None:
1403 buf = b""
1404 flag = 0
1405 else:
1406 buf, flag = self.decoder.getstate()
1407 flag <<= 1
1408 if self.pendingcr:
1409 flag |= 1
1410 return buf, flag
1411
1412 def setstate(self, state):
1413 buf, flag = state
1414 self.pendingcr = bool(flag & 1)
1415 if self.decoder is not None:
1416 self.decoder.setstate((buf, flag >> 1))
1417
1418 def reset(self):
1419 self.seennl = 0
1420 self.pendingcr = False
1421 if self.decoder is not None:
1422 self.decoder.reset()
1423
1424 _LF = 1
1425 _CR = 2
1426 _CRLF = 4
1427
1428 @property
1429 def newlines(self):
1430 return (None,
1431 "\n",
1432 "\r",
1433 ("\r", "\n"),
1434 "\r\n",
1435 ("\n", "\r\n"),
1436 ("\r", "\r\n"),
1437 ("\r", "\n", "\r\n")
1438 )[self.seennl]
1439
1440
1441class TextIOWrapper(TextIOBase):
1442
1443 r"""Character and line based layer over a BufferedIOBase object, buffer.
1444
1445 encoding gives the name of the encoding that the stream will be
1446 decoded or encoded with. It defaults to locale.getpreferredencoding.
1447
1448 errors determines the strictness of encoding and decoding (see the
1449 codecs.register) and defaults to "strict".
1450
1451 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1452 handling of line endings. If it is None, universal newlines is
1453 enabled. With this enabled, on input, the lines endings '\n', '\r',
1454 or '\r\n' are translated to '\n' before being returned to the
1455 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001456 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001457 legal values, that newline becomes the newline when the file is read
1458 and it is returned untranslated. On output, '\n' is converted to the
1459 newline.
1460
1461 If line_buffering is True, a call to flush is implied when a call to
1462 write contains a newline character.
1463 """
1464
1465 _CHUNK_SIZE = 2048
1466
1467 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001468 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001469 if newline is not None and not isinstance(newline, str):
1470 raise TypeError("illegal newline type: %r" % (type(newline),))
1471 if newline not in (None, "", "\n", "\r", "\r\n"):
1472 raise ValueError("illegal newline value: %r" % (newline,))
1473 if encoding is None:
1474 try:
1475 encoding = os.device_encoding(buffer.fileno())
1476 except (AttributeError, UnsupportedOperation):
1477 pass
1478 if encoding is None:
1479 try:
1480 import locale
1481 except ImportError:
1482 # Importing locale may fail if Python is being built
1483 encoding = "ascii"
1484 else:
1485 encoding = locale.getpreferredencoding()
1486
1487 if not isinstance(encoding, str):
1488 raise ValueError("invalid encoding: %r" % encoding)
1489
1490 if errors is None:
1491 errors = "strict"
1492 else:
1493 if not isinstance(errors, str):
1494 raise ValueError("invalid errors: %r" % errors)
1495
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001496 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497 self._line_buffering = line_buffering
1498 self._encoding = encoding
1499 self._errors = errors
1500 self._readuniversal = not newline
1501 self._readtranslate = newline is None
1502 self._readnl = newline
1503 self._writetranslate = newline != ''
1504 self._writenl = newline or os.linesep
1505 self._encoder = None
1506 self._decoder = None
1507 self._decoded_chars = '' # buffer for text returned from decoder
1508 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1509 self._snapshot = None # info for reconstructing decoder state
1510 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001511 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001512 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001513
Antoine Pitroue4501852009-05-14 18:55:55 +00001514 if self._seekable and self.writable():
1515 position = self.buffer.tell()
1516 if position != 0:
1517 try:
1518 self._get_encoder().setstate(0)
1519 except LookupError:
1520 # Sometimes the encoder doesn't exist
1521 pass
1522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1524 # where dec_flags is the second (integer) item of the decoder state
1525 # and next_input is the chunk of input bytes that comes next after the
1526 # snapshot point. We use this to reconstruct decoder states in tell().
1527
1528 # Naming convention:
1529 # - "bytes_..." for integer variables that count input bytes
1530 # - "chars_..." for integer variables that count decoded characters
1531
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001532 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001533 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001534 try:
1535 name = self.name
1536 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001537 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001538 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001539 result += " name={0!r}".format(name)
1540 try:
1541 mode = self.mode
1542 except AttributeError:
1543 pass
1544 else:
1545 result += " mode={0!r}".format(mode)
1546 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001547
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001548 @property
1549 def encoding(self):
1550 return self._encoding
1551
1552 @property
1553 def errors(self):
1554 return self._errors
1555
1556 @property
1557 def line_buffering(self):
1558 return self._line_buffering
1559
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001560 @property
1561 def buffer(self):
1562 return self._buffer
1563
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 def seekable(self):
1565 return self._seekable
1566
1567 def readable(self):
1568 return self.buffer.readable()
1569
1570 def writable(self):
1571 return self.buffer.writable()
1572
1573 def flush(self):
1574 self.buffer.flush()
1575 self._telling = self._seekable
1576
1577 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001578 if self.buffer is not None and not self.closed:
1579 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001580 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581
1582 @property
1583 def closed(self):
1584 return self.buffer.closed
1585
1586 @property
1587 def name(self):
1588 return self.buffer.name
1589
1590 def fileno(self):
1591 return self.buffer.fileno()
1592
1593 def isatty(self):
1594 return self.buffer.isatty()
1595
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001596 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001597 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598 if self.closed:
1599 raise ValueError("write to closed file")
1600 if not isinstance(s, str):
1601 raise TypeError("can't write %s to text stream" %
1602 s.__class__.__name__)
1603 length = len(s)
1604 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1605 if haslf and self._writetranslate and self._writenl != "\n":
1606 s = s.replace("\n", self._writenl)
1607 encoder = self._encoder or self._get_encoder()
1608 # XXX What if we were just reading?
1609 b = encoder.encode(s)
1610 self.buffer.write(b)
1611 if self._line_buffering and (haslf or "\r" in s):
1612 self.flush()
1613 self._snapshot = None
1614 if self._decoder:
1615 self._decoder.reset()
1616 return length
1617
1618 def _get_encoder(self):
1619 make_encoder = codecs.getincrementalencoder(self._encoding)
1620 self._encoder = make_encoder(self._errors)
1621 return self._encoder
1622
1623 def _get_decoder(self):
1624 make_decoder = codecs.getincrementaldecoder(self._encoding)
1625 decoder = make_decoder(self._errors)
1626 if self._readuniversal:
1627 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1628 self._decoder = decoder
1629 return decoder
1630
1631 # The following three methods implement an ADT for _decoded_chars.
1632 # Text returned from the decoder is buffered here until the client
1633 # requests it by calling our read() or readline() method.
1634 def _set_decoded_chars(self, chars):
1635 """Set the _decoded_chars buffer."""
1636 self._decoded_chars = chars
1637 self._decoded_chars_used = 0
1638
1639 def _get_decoded_chars(self, n=None):
1640 """Advance into the _decoded_chars buffer."""
1641 offset = self._decoded_chars_used
1642 if n is None:
1643 chars = self._decoded_chars[offset:]
1644 else:
1645 chars = self._decoded_chars[offset:offset + n]
1646 self._decoded_chars_used += len(chars)
1647 return chars
1648
1649 def _rewind_decoded_chars(self, n):
1650 """Rewind the _decoded_chars buffer."""
1651 if self._decoded_chars_used < n:
1652 raise AssertionError("rewind decoded_chars out of bounds")
1653 self._decoded_chars_used -= n
1654
1655 def _read_chunk(self):
1656 """
1657 Read and decode the next chunk of data from the BufferedReader.
1658 """
1659
1660 # The return value is True unless EOF was reached. The decoded
1661 # string is placed in self._decoded_chars (replacing its previous
1662 # value). The entire input chunk is sent to the decoder, though
1663 # some of it may remain buffered in the decoder, yet to be
1664 # converted.
1665
1666 if self._decoder is None:
1667 raise ValueError("no decoder")
1668
1669 if self._telling:
1670 # To prepare for tell(), we need to snapshot a point in the
1671 # file where the decoder's input buffer is empty.
1672
1673 dec_buffer, dec_flags = self._decoder.getstate()
1674 # Given this, we know there was a valid snapshot point
1675 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1676
1677 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001678 if self._has_read1:
1679 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1680 else:
1681 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001683 decoded_chars = self._decoder.decode(input_chunk, eof)
1684 self._set_decoded_chars(decoded_chars)
1685 if decoded_chars:
1686 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1687 else:
1688 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001689
1690 if self._telling:
1691 # At the snapshot point, len(dec_buffer) bytes before the read,
1692 # the next input to be decoded is dec_buffer + input_chunk.
1693 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1694
1695 return not eof
1696
1697 def _pack_cookie(self, position, dec_flags=0,
1698 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1699 # The meaning of a tell() cookie is: seek to position, set the
1700 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1701 # into the decoder with need_eof as the EOF flag, then skip
1702 # chars_to_skip characters of the decoded result. For most simple
1703 # decoders, tell() will often just give a byte offset in the file.
1704 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1705 (chars_to_skip<<192) | bool(need_eof)<<256)
1706
1707 def _unpack_cookie(self, bigint):
1708 rest, position = divmod(bigint, 1<<64)
1709 rest, dec_flags = divmod(rest, 1<<64)
1710 rest, bytes_to_feed = divmod(rest, 1<<64)
1711 need_eof, chars_to_skip = divmod(rest, 1<<64)
1712 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1713
1714 def tell(self):
1715 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001716 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 if not self._telling:
1718 raise IOError("telling position disabled by next() call")
1719 self.flush()
1720 position = self.buffer.tell()
1721 decoder = self._decoder
1722 if decoder is None or self._snapshot is None:
1723 if self._decoded_chars:
1724 # This should never happen.
1725 raise AssertionError("pending decoded text")
1726 return position
1727
1728 # Skip backward to the snapshot point (see _read_chunk).
1729 dec_flags, next_input = self._snapshot
1730 position -= len(next_input)
1731
1732 # How many decoded characters have been used up since the snapshot?
1733 chars_to_skip = self._decoded_chars_used
1734 if chars_to_skip == 0:
1735 # We haven't moved from the snapshot point.
1736 return self._pack_cookie(position, dec_flags)
1737
1738 # Starting from the snapshot position, we will walk the decoder
1739 # forward until it gives us enough decoded characters.
1740 saved_state = decoder.getstate()
1741 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001742 # Fast search for an acceptable start point, close to our
1743 # current pos.
1744 # Rationale: calling decoder.decode() has a large overhead
1745 # regardless of chunk size; we want the number of such calls to
1746 # be O(1) in most situations (common decoders, non-crazy input).
1747 # Actually, it will be exactly 1 for fixed-size codecs (all
1748 # 8-bit codecs, also UTF-16 and UTF-32).
1749 skip_bytes = int(self._b2cratio * chars_to_skip)
1750 skip_back = 1
1751 assert skip_bytes <= len(next_input)
1752 while skip_bytes > 0:
1753 decoder.setstate((b'', dec_flags))
1754 # Decode up to temptative start point
1755 n = len(decoder.decode(next_input[:skip_bytes]))
1756 if n <= chars_to_skip:
1757 b, d = decoder.getstate()
1758 if not b:
1759 # Before pos and no bytes buffered in decoder => OK
1760 dec_flags = d
1761 chars_to_skip -= n
1762 break
1763 # Skip back by buffered amount and reset heuristic
1764 skip_bytes -= len(b)
1765 skip_back = 1
1766 else:
1767 # We're too far ahead, skip back a bit
1768 skip_bytes -= skip_back
1769 skip_back = skip_back * 2
1770 else:
1771 skip_bytes = 0
1772 decoder.setstate((b'', dec_flags))
1773
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001774 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001775 start_pos = position + skip_bytes
1776 start_flags = dec_flags
1777 if chars_to_skip == 0:
1778 # We haven't moved from the start point.
1779 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001780
1781 # Feed the decoder one byte at a time. As we go, note the
1782 # nearest "safe start point" before the current location
1783 # (a point where the decoder has nothing buffered, so seek()
1784 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001785 bytes_fed = 0
1786 need_eof = 0
1787 # Chars decoded since `start_pos`
1788 chars_decoded = 0
1789 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001791 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001792 dec_buffer, dec_flags = decoder.getstate()
1793 if not dec_buffer and chars_decoded <= chars_to_skip:
1794 # Decoder buffer is empty, so this is a safe start point.
1795 start_pos += bytes_fed
1796 chars_to_skip -= chars_decoded
1797 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1798 if chars_decoded >= chars_to_skip:
1799 break
1800 else:
1801 # We didn't get enough decoded data; signal EOF to get more.
1802 chars_decoded += len(decoder.decode(b'', final=True))
1803 need_eof = 1
1804 if chars_decoded < chars_to_skip:
1805 raise IOError("can't reconstruct logical file position")
1806
1807 # The returned cookie corresponds to the last safe start point.
1808 return self._pack_cookie(
1809 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1810 finally:
1811 decoder.setstate(saved_state)
1812
1813 def truncate(self, pos=None):
1814 self.flush()
1815 if pos is None:
1816 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001817 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001819 def detach(self):
1820 if self.buffer is None:
1821 raise ValueError("buffer is already detached")
1822 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001823 buffer = self._buffer
1824 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001825 return buffer
1826
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 def seek(self, cookie, whence=0):
1828 if self.closed:
1829 raise ValueError("tell on closed file")
1830 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001831 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001832 if whence == 1: # seek relative to current position
1833 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001834 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 # Seeking to the current position should attempt to
1836 # sync the underlying buffer with the current position.
1837 whence = 0
1838 cookie = self.tell()
1839 if whence == 2: # seek relative to end of file
1840 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001841 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 self.flush()
1843 position = self.buffer.seek(0, 2)
1844 self._set_decoded_chars('')
1845 self._snapshot = None
1846 if self._decoder:
1847 self._decoder.reset()
1848 return position
1849 if whence != 0:
Jesus Cea2b47f0a2012-04-26 16:39:35 +02001850 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 if cookie < 0:
1852 raise ValueError("negative seek position %r" % (cookie,))
1853 self.flush()
1854
1855 # The strategy of seek() is to go back to the safe start point
1856 # and replay the effect of read(chars_to_skip) from there.
1857 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1858 self._unpack_cookie(cookie)
1859
1860 # Seek back to the safe start point.
1861 self.buffer.seek(start_pos)
1862 self._set_decoded_chars('')
1863 self._snapshot = None
1864
1865 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001866 if cookie == 0 and self._decoder:
1867 self._decoder.reset()
1868 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 self._decoder = self._decoder or self._get_decoder()
1870 self._decoder.setstate((b'', dec_flags))
1871 self._snapshot = (dec_flags, b'')
1872
1873 if chars_to_skip:
1874 # Just like _read_chunk, feed the decoder and save a snapshot.
1875 input_chunk = self.buffer.read(bytes_to_feed)
1876 self._set_decoded_chars(
1877 self._decoder.decode(input_chunk, need_eof))
1878 self._snapshot = (dec_flags, input_chunk)
1879
1880 # Skip chars_to_skip of the decoded characters.
1881 if len(self._decoded_chars) < chars_to_skip:
1882 raise IOError("can't restore logical file position")
1883 self._decoded_chars_used = chars_to_skip
1884
Antoine Pitroue4501852009-05-14 18:55:55 +00001885 # Finally, reset the encoder (merely useful for proper BOM handling)
1886 try:
1887 encoder = self._encoder or self._get_encoder()
1888 except LookupError:
1889 # Sometimes the encoder doesn't exist
1890 pass
1891 else:
1892 if cookie != 0:
1893 encoder.setstate(0)
1894 else:
1895 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001896 return cookie
1897
1898 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001899 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001900 if n is None:
1901 n = -1
1902 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001903 try:
1904 n.__index__
1905 except AttributeError as err:
1906 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 if n < 0:
1908 # Read everything.
1909 result = (self._get_decoded_chars() +
1910 decoder.decode(self.buffer.read(), final=True))
1911 self._set_decoded_chars('')
1912 self._snapshot = None
1913 return result
1914 else:
1915 # Keep reading chunks until we have n characters to return.
1916 eof = False
1917 result = self._get_decoded_chars(n)
1918 while len(result) < n and not eof:
1919 eof = not self._read_chunk()
1920 result += self._get_decoded_chars(n - len(result))
1921 return result
1922
1923 def __next__(self):
1924 self._telling = False
1925 line = self.readline()
1926 if not line:
1927 self._snapshot = None
1928 self._telling = self._seekable
1929 raise StopIteration
1930 return line
1931
1932 def readline(self, limit=None):
1933 if self.closed:
1934 raise ValueError("read from closed file")
1935 if limit is None:
1936 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001937 elif not isinstance(limit, int):
1938 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939
1940 # Grab all the decoded text (we will rewind any extra bits later).
1941 line = self._get_decoded_chars()
1942
1943 start = 0
1944 # Make the decoder if it doesn't already exist.
1945 if not self._decoder:
1946 self._get_decoder()
1947
1948 pos = endpos = None
1949 while True:
1950 if self._readtranslate:
1951 # Newlines are already translated, only search for \n
1952 pos = line.find('\n', start)
1953 if pos >= 0:
1954 endpos = pos + 1
1955 break
1956 else:
1957 start = len(line)
1958
1959 elif self._readuniversal:
1960 # Universal newline search. Find any of \r, \r\n, \n
1961 # The decoder ensures that \r\n are not split in two pieces
1962
1963 # In C we'd look for these in parallel of course.
1964 nlpos = line.find("\n", start)
1965 crpos = line.find("\r", start)
1966 if crpos == -1:
1967 if nlpos == -1:
1968 # Nothing found
1969 start = len(line)
1970 else:
1971 # Found \n
1972 endpos = nlpos + 1
1973 break
1974 elif nlpos == -1:
1975 # Found lone \r
1976 endpos = crpos + 1
1977 break
1978 elif nlpos < crpos:
1979 # Found \n
1980 endpos = nlpos + 1
1981 break
1982 elif nlpos == crpos + 1:
1983 # Found \r\n
1984 endpos = crpos + 2
1985 break
1986 else:
1987 # Found \r
1988 endpos = crpos + 1
1989 break
1990 else:
1991 # non-universal
1992 pos = line.find(self._readnl)
1993 if pos >= 0:
1994 endpos = pos + len(self._readnl)
1995 break
1996
1997 if limit >= 0 and len(line) >= limit:
1998 endpos = limit # reached length limit
1999 break
2000
2001 # No line ending seen yet - get more data'
2002 while self._read_chunk():
2003 if self._decoded_chars:
2004 break
2005 if self._decoded_chars:
2006 line += self._get_decoded_chars()
2007 else:
2008 # end of file
2009 self._set_decoded_chars('')
2010 self._snapshot = None
2011 return line
2012
2013 if limit >= 0 and endpos > limit:
2014 endpos = limit # don't exceed limit
2015
2016 # Rewind _decoded_chars to just after the line ending we found.
2017 self._rewind_decoded_chars(len(line) - endpos)
2018 return line[:endpos]
2019
2020 @property
2021 def newlines(self):
2022 return self._decoder.newlines if self._decoder else None
2023
2024
2025class StringIO(TextIOWrapper):
2026 """Text I/O implementation using an in-memory buffer.
2027
2028 The initial_value argument sets the value of object. The newline
2029 argument is like the one of TextIOWrapper's constructor.
2030 """
2031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 def __init__(self, initial_value="", newline="\n"):
2033 super(StringIO, self).__init__(BytesIO(),
2034 encoding="utf-8",
2035 errors="strict",
2036 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002037 # Issue #5645: make universal newlines semantics the same as in the
2038 # C version, even under Windows.
2039 if newline is None:
2040 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002041 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002043 raise TypeError("initial_value must be str or None, not {0}"
2044 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 initial_value = str(initial_value)
2046 self.write(initial_value)
2047 self.seek(0)
2048
2049 def getvalue(self):
2050 self.flush()
2051 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002052
2053 def __repr__(self):
2054 # TextIOWrapper tells the encoding in its repr. In StringIO,
2055 # that's a implementation detail.
2056 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002057
2058 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002059 def errors(self):
2060 return None
2061
2062 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002063 def encoding(self):
2064 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002065
2066 def detach(self):
2067 # This doesn't make sense on StringIO.
2068 self._unsupported("detach")