blob: a0c4b25977019b91cdba52084f76beeb6386f434 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
Jesus Cea94363612012-06-22 18:32:07 +020018valid_seek_flags = {0, 1, 2} # Hardwired values
19if hasattr(os, 'SEEK_HOLE') :
20 valid_seek_flags.add(os.SEEK_HOLE)
21 valid_seek_flags.add(os.SEEK_DATA)
22
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020030# Rebind for compatibility
31BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33
Georg Brandl4d73b572011-01-13 07:13:06 +000034def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020035 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
37 r"""Open file and return a stream. Raise IOError upon failure.
38
39 file is either a text or byte string giving the name (and the path
40 if the file isn't in the current working directory) of the file to
41 be opened or an integer file descriptor of the file to be
42 wrapped. (If a file descriptor is given, it is closed when the
43 returned I/O object is closed, unless closefd is set to False.)
44
Charles-François Natalidc3044c2012-01-09 22:40:02 +010045 mode is an optional string that specifies the mode in which the file is
46 opened. It defaults to 'r' which means open for reading in text mode. Other
47 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010048 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010049 (which on some Unix systems, means that all writes append to the end of the
50 file regardless of the current seek position). In text mode, if encoding is
51 not specified the encoding used is platform dependent. (For reading and
52 writing raw bytes use binary mode and leave encoding unspecified.) The
53 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000054
55 ========= ===============================================================
56 Character Meaning
57 --------- ---------------------------------------------------------------
58 'r' open for reading (default)
59 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010060 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061 'a' open for writing, appending to the end of the file if it exists
62 'b' binary mode
63 't' text mode (default)
64 '+' open a disk file for updating (reading and writing)
65 'U' universal newline mode (for backwards compatibility; unneeded
66 for new code)
67 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Antoine Pitroud5587bc2009-12-19 21:08:31 +000082 buffering is an optional integer used to set the buffering policy.
83 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
84 line buffering (only usable in text mode), and an integer > 1 to indicate
85 the size of a fixed-size chunk buffer. When no buffering argument is
86 given, the default buffering policy works as follows:
87
88 * Binary files are buffered in fixed-size chunks; the size of the buffer
89 is chosen using a heuristic trying to determine the underlying device's
90 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
91 On many systems, the buffer will typically be 4096 or 8192 bytes long.
92
93 * "Interactive" text files (files for which isatty() returns True)
94 use line buffering. Other text files use the policy described above
95 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096
Raymond Hettingercbb80892011-01-13 18:15:51 +000097 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098 file. This should only be used in text mode. The default encoding is
99 platform dependent, but any encoding supported by Python can be
100 passed. See the codecs module for the list of supported encodings.
101
102 errors is an optional string that specifies how encoding errors are to
103 be handled---this argument should not be used in binary mode. Pass
104 'strict' to raise a ValueError exception if there is an encoding error
105 (the default of None has the same effect), or pass 'ignore' to ignore
106 errors. (Note that ignoring encoding errors can lead to data loss.)
107 See the documentation for codecs.register for a list of the permitted
108 encoding error strings.
109
Raymond Hettingercbb80892011-01-13 18:15:51 +0000110 newline is a string controlling how universal newlines works (it only
111 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
112 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113
114 * On input, if newline is None, universal newlines mode is
115 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
116 these are translated into '\n' before being returned to the
117 caller. If it is '', universal newline mode is enabled, but line
118 endings are returned to the caller untranslated. If it has any of
119 the other legal values, input lines are only terminated by the given
120 string, and the line ending is returned to the caller untranslated.
121
122 * On output, if newline is None, any '\n' characters written are
123 translated to the system default line separator, os.linesep. If
124 newline is '', no translation takes place. If newline is any of the
125 other legal values, any '\n' characters written are translated to
126 the given string.
127
Raymond Hettingercbb80892011-01-13 18:15:51 +0000128 closedfd is a bool. If closefd is False, the underlying file descriptor will
129 be kept open when the file is closed. This does not work when a file name is
130 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131
Ross Lagerwall59142db2011-10-31 20:34:46 +0200132 A custom opener can be used by passing a callable as *opener*. The
133 underlying file descriptor for the file object is then obtained by calling
134 *opener* with (*file*, *flags*). *opener* must return an open file
135 descriptor (passing os.open as *opener* results in functionality similar to
136 passing None).
137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (str, bytes, int)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, str):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000156 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, str):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, str):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100163 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100165 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100180 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200194 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 line_buffering = False
196 if buffering == 1 or buffering < 0 and raw.isatty():
197 buffering = -1
198 line_buffering = True
199 if buffering < 0:
200 buffering = DEFAULT_BUFFER_SIZE
201 try:
202 bs = os.fstat(raw.fileno()).st_blksize
203 except (os.error, AttributeError):
204 pass
205 else:
206 if bs > 1:
207 buffering = bs
208 if buffering < 0:
209 raise ValueError("invalid buffering size")
210 if buffering == 0:
211 if binary:
212 return raw
213 raise ValueError("can't have unbuffered text I/O")
214 if updating:
215 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100216 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000217 buffer = BufferedWriter(raw, buffering)
218 elif reading:
219 buffer = BufferedReader(raw, buffering)
220 else:
221 raise ValueError("unknown mode: %r" % mode)
222 if binary:
223 return buffer
224 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225 text.mode = mode
226 return text
227
228
229class DocDescriptor:
230 """Helper for builtins.open.__doc__
231 """
232 def __get__(self, obj, typ):
233 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000234 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 "errors=None, newline=None, closefd=True)\n\n" +
236 open.__doc__)
237
238class OpenWrapper:
239 """Wrapper for builtins.open
240
241 Trick so that open won't become a bound method when stored
242 as a class variable (as dbm.dumb does).
243
244 See initstdio() in Python/pythonrun.c.
245 """
246 __doc__ = DocDescriptor()
247
248 def __new__(cls, *args, **kwargs):
249 return open(*args, **kwargs)
250
251
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000252# In normal operation, both `UnsupportedOperation`s should be bound to the
253# same object.
254try:
255 UnsupportedOperation = io.UnsupportedOperation
256except AttributeError:
257 class UnsupportedOperation(ValueError, IOError):
258 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259
260
261class IOBase(metaclass=abc.ABCMeta):
262
263 """The abstract base class for all I/O classes, acting on streams of
264 bytes. There is no public constructor.
265
266 This class provides dummy implementations for many methods that
267 derived classes can override selectively; the default implementations
268 represent a file that cannot be read, written or seeked.
269
270 Even though IOBase does not declare read, readinto, or write because
271 their signatures will vary, implementations and clients should
272 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000273 may raise UnsupportedOperation when operations they do not support are
274 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276 The basic type used for binary data read from or written to a file is
277 bytes. bytearrays are accepted too, and in some cases (such as
278 readinto) needed. Text I/O classes work with str data.
279
280 Note that calling any method (even inquiries) on a closed stream is
281 undefined. Implementations may raise IOError in this case.
282
283 IOBase (and its subclasses) support the iterator protocol, meaning
284 that an IOBase object can be iterated over yielding the lines in a
285 stream.
286
287 IOBase also supports the :keyword:`with` statement. In this example,
288 fp is closed after the suite of the with statement is complete:
289
290 with open('spam.txt', 'r') as fp:
291 fp.write('Spam and eggs!')
292 """
293
294 ### Internal ###
295
Raymond Hettinger3c940242011-01-12 23:39:31 +0000296 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000297 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 raise UnsupportedOperation("%s.%s() not supported" %
299 (self.__class__.__name__, name))
300
301 ### Positioning ###
302
Georg Brandl4d73b572011-01-13 07:13:06 +0000303 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304 """Change stream position.
305
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400306 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000307 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309
310 * 0 -- start of stream (the default); offset should be zero or positive
311 * 1 -- current stream position; offset may be negative
312 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200313 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314
Raymond Hettingercbb80892011-01-13 18:15:51 +0000315 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """
317 self._unsupported("seek")
318
Raymond Hettinger3c940242011-01-12 23:39:31 +0000319 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000320 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 return self.seek(0, 1)
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Truncate file to size bytes.
325
326 Size defaults to the current IO position as reported by tell(). Return
327 the new size.
328 """
329 self._unsupported("truncate")
330
331 ### Flush and close ###
332
Raymond Hettinger3c940242011-01-12 23:39:31 +0000333 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 """Flush write buffers, if applicable.
335
336 This is not implemented for read-only and non-blocking streams.
337 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000338 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 # XXX Should this return the number of bytes written???
340
341 __closed = False
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Flush and close the IO object.
345
346 This method has no effect if the file is already closed.
347 """
348 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600349 try:
350 self.flush()
351 finally:
352 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000353
Raymond Hettinger3c940242011-01-12 23:39:31 +0000354 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 """Destructor. Calls close()."""
356 # The try/except block is in case this is called at program
357 # exit time, when it's possible that globals have already been
358 # deleted, and then the close() call might fail. Since
359 # there's nothing we can do about such failures and they annoy
360 # the end users, we suppress the traceback.
361 try:
362 self.close()
363 except:
364 pass
365
366 ### Inquiries ###
367
Raymond Hettinger3c940242011-01-12 23:39:31 +0000368 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000369 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000371 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 This method may need to do a test seek().
373 """
374 return False
375
376 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000380 raise UnsupportedOperation("File or stream is not seekable."
381 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
Raymond Hettinger3c940242011-01-12 23:39:31 +0000383 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000384 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000386 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387 """
388 return False
389
390 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000394 raise UnsupportedOperation("File or stream is not readable."
395 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396
Raymond Hettinger3c940242011-01-12 23:39:31 +0000397 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000398 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000400 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 """
402 return False
403
404 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000405 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 """
407 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000408 raise UnsupportedOperation("File or stream is not writable."
409 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410
411 @property
412 def closed(self):
413 """closed: bool. True iff the file has been closed.
414
415 For backwards compatibility, this is a property, not a predicate.
416 """
417 return self.__closed
418
419 def _checkClosed(self, msg=None):
420 """Internal: raise an ValueError if file is closed
421 """
422 if self.closed:
423 raise ValueError("I/O operation on closed file."
424 if msg is None else msg)
425
426 ### Context manager ###
427
Raymond Hettinger3c940242011-01-12 23:39:31 +0000428 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000429 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 self._checkClosed()
431 return self
432
Raymond Hettinger3c940242011-01-12 23:39:31 +0000433 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 """Context management protocol. Calls close()"""
435 self.close()
436
437 ### Lower-level APIs ###
438
439 # XXX Should these be present even if unimplemented?
440
Raymond Hettinger3c940242011-01-12 23:39:31 +0000441 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000442 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443
444 An IOError is raised if the IO object does not use a file descriptor.
445 """
446 self._unsupported("fileno")
447
Raymond Hettinger3c940242011-01-12 23:39:31 +0000448 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450
451 Return False if it can't be determined.
452 """
453 self._checkClosed()
454 return False
455
456 ### Readline[s] and writelines ###
457
Georg Brandl4d73b572011-01-13 07:13:06 +0000458 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000459 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460
461 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000462 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463
464 The line terminator is always b'\n' for binary files; for text
465 files, the newlines argument to open can be used to select the line
466 terminator(s) recognized.
467 """
468 # For backwards compatibility, a (slowish) readline().
469 if hasattr(self, "peek"):
470 def nreadahead():
471 readahead = self.peek(1)
472 if not readahead:
473 return 1
474 n = (readahead.find(b"\n") + 1) or len(readahead)
475 if limit >= 0:
476 n = min(n, limit)
477 return n
478 else:
479 def nreadahead():
480 return 1
481 if limit is None:
482 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000483 elif not isinstance(limit, int):
484 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 res = bytearray()
486 while limit < 0 or len(res) < limit:
487 b = self.read(nreadahead())
488 if not b:
489 break
490 res += b
491 if res.endswith(b"\n"):
492 break
493 return bytes(res)
494
495 def __iter__(self):
496 self._checkClosed()
497 return self
498
499 def __next__(self):
500 line = self.readline()
501 if not line:
502 raise StopIteration
503 return line
504
505 def readlines(self, hint=None):
506 """Return a list of lines from the stream.
507
508 hint can be specified to control the number of lines read: no more
509 lines will be read if the total size (in bytes/characters) of all
510 lines so far exceeds hint.
511 """
512 if hint is None or hint <= 0:
513 return list(self)
514 n = 0
515 lines = []
516 for line in self:
517 lines.append(line)
518 n += len(line)
519 if n >= hint:
520 break
521 return lines
522
523 def writelines(self, lines):
524 self._checkClosed()
525 for line in lines:
526 self.write(line)
527
528io.IOBase.register(IOBase)
529
530
531class RawIOBase(IOBase):
532
533 """Base class for raw binary I/O."""
534
535 # The read() method is implemented by calling readinto(); derived
536 # classes that want to support read() only need to implement
537 # readinto() as a primitive operation. In general, readinto() can be
538 # more efficient than read().
539
540 # (It would be tempting to also provide an implementation of
541 # readinto() in terms of read(), in case the latter is a more suitable
542 # primitive operation, but that would lead to nasty recursion in case
543 # a subclass doesn't implement either.)
544
Georg Brandl4d73b572011-01-13 07:13:06 +0000545 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000546 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547
548 Returns an empty bytes object on EOF, or None if the object is
549 set not to block and has no data to read.
550 """
551 if n is None:
552 n = -1
553 if n < 0:
554 return self.readall()
555 b = bytearray(n.__index__())
556 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000557 if n is None:
558 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559 del b[n:]
560 return bytes(b)
561
562 def readall(self):
563 """Read until EOF, using multiple read() call."""
564 res = bytearray()
565 while True:
566 data = self.read(DEFAULT_BUFFER_SIZE)
567 if not data:
568 break
569 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200570 if res:
571 return bytes(res)
572 else:
573 # b'' or None
574 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Raymond Hettinger3c940242011-01-12 23:39:31 +0000576 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000577 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578
Raymond Hettingercbb80892011-01-13 18:15:51 +0000579 Returns an int representing the number of bytes read (0 for EOF), or
580 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 """
582 self._unsupported("readinto")
583
Raymond Hettinger3c940242011-01-12 23:39:31 +0000584 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 """Write the given buffer to the IO stream.
586
587 Returns the number of bytes written, which may be less than len(b).
588 """
589 self._unsupported("write")
590
591io.RawIOBase.register(RawIOBase)
592from _io import FileIO
593RawIOBase.register(FileIO)
594
595
596class BufferedIOBase(IOBase):
597
598 """Base class for buffered IO objects.
599
600 The main difference with RawIOBase is that the read() method
601 supports omitting the size argument, and does not have a default
602 implementation that defers to readinto().
603
604 In addition, read(), readinto() and write() may raise
605 BlockingIOError if the underlying raw stream is in non-blocking
606 mode and not ready; unlike their raw counterparts, they will never
607 return None.
608
609 A typical implementation should not inherit from a RawIOBase
610 implementation, but wrap one.
611 """
612
Georg Brandl4d73b572011-01-13 07:13:06 +0000613 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000614 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615
616 If the argument is omitted, None, or negative, reads and
617 returns all data until EOF.
618
619 If the argument is positive, and the underlying raw stream is
620 not 'interactive', multiple raw reads may be issued to satisfy
621 the byte count (unless EOF is reached first). But for
622 interactive raw streams (XXX and for pipes?), at most one raw
623 read will be issued, and a short result does not imply that
624 EOF is imminent.
625
626 Returns an empty bytes array on EOF.
627
628 Raises BlockingIOError if the underlying raw stream has no
629 data at the moment.
630 """
631 self._unsupported("read")
632
Georg Brandl4d73b572011-01-13 07:13:06 +0000633 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000634 """Read up to n bytes with at most one read() system call,
635 where n is an int.
636 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 self._unsupported("read1")
638
Raymond Hettinger3c940242011-01-12 23:39:31 +0000639 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000640 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000641
642 Like read(), this may issue multiple reads to the underlying raw
643 stream, unless the latter is 'interactive'.
644
Raymond Hettingercbb80892011-01-13 18:15:51 +0000645 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646
647 Raises BlockingIOError if the underlying raw stream has no
648 data at the moment.
649 """
650 # XXX This ought to work with anything that supports the buffer API
651 data = self.read(len(b))
652 n = len(data)
653 try:
654 b[:n] = data
655 except TypeError as err:
656 import array
657 if not isinstance(b, array.array):
658 raise err
659 b[:n] = array.array('b', data)
660 return n
661
Raymond Hettinger3c940242011-01-12 23:39:31 +0000662 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000663 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 Return the number of bytes written, which is never less than
666 len(b).
667
668 Raises BlockingIOError if the buffer is full and the
669 underlying raw stream cannot accept more data at the moment.
670 """
671 self._unsupported("write")
672
Raymond Hettinger3c940242011-01-12 23:39:31 +0000673 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000674 """
675 Separate the underlying raw stream from the buffer and return it.
676
677 After the raw stream has been detached, the buffer is in an unusable
678 state.
679 """
680 self._unsupported("detach")
681
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682io.BufferedIOBase.register(BufferedIOBase)
683
684
685class _BufferedIOMixin(BufferedIOBase):
686
687 """A mixin implementation of BufferedIOBase with an underlying raw stream.
688
689 This passes most requests on to the underlying raw stream. It
690 does *not* provide implementations of read(), readinto() or
691 write().
692 """
693
694 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000695 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
697 ### Positioning ###
698
699 def seek(self, pos, whence=0):
700 new_position = self.raw.seek(pos, whence)
701 if new_position < 0:
702 raise IOError("seek() returned an invalid position")
703 return new_position
704
705 def tell(self):
706 pos = self.raw.tell()
707 if pos < 0:
708 raise IOError("tell() returned an invalid position")
709 return pos
710
711 def truncate(self, pos=None):
712 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
713 # and a flush may be necessary to synch both views of the current
714 # file state.
715 self.flush()
716
717 if pos is None:
718 pos = self.tell()
719 # XXX: Should seek() be used, instead of passing the position
720 # XXX directly to truncate?
721 return self.raw.truncate(pos)
722
723 ### Flush and close ###
724
725 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000726 if self.closed:
727 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 self.raw.flush()
729
730 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000731 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100732 try:
733 # may raise BlockingIOError or BrokenPipeError etc
734 self.flush()
735 finally:
736 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000738 def detach(self):
739 if self.raw is None:
740 raise ValueError("raw stream already detached")
741 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000742 raw = self._raw
743 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000744 return raw
745
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746 ### Inquiries ###
747
748 def seekable(self):
749 return self.raw.seekable()
750
751 def readable(self):
752 return self.raw.readable()
753
754 def writable(self):
755 return self.raw.writable()
756
757 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000758 def raw(self):
759 return self._raw
760
761 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762 def closed(self):
763 return self.raw.closed
764
765 @property
766 def name(self):
767 return self.raw.name
768
769 @property
770 def mode(self):
771 return self.raw.mode
772
Antoine Pitrou243757e2010-11-05 21:15:39 +0000773 def __getstate__(self):
774 raise TypeError("can not serialize a '{0}' object"
775 .format(self.__class__.__name__))
776
Antoine Pitrou716c4442009-05-23 19:04:03 +0000777 def __repr__(self):
778 clsname = self.__class__.__name__
779 try:
780 name = self.name
781 except AttributeError:
782 return "<_pyio.{0}>".format(clsname)
783 else:
784 return "<_pyio.{0} name={1!r}>".format(clsname, name)
785
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786 ### Lower-level APIs ###
787
788 def fileno(self):
789 return self.raw.fileno()
790
791 def isatty(self):
792 return self.raw.isatty()
793
794
795class BytesIO(BufferedIOBase):
796
797 """Buffered I/O implementation using an in-memory bytes buffer."""
798
799 def __init__(self, initial_bytes=None):
800 buf = bytearray()
801 if initial_bytes is not None:
802 buf += initial_bytes
803 self._buffer = buf
804 self._pos = 0
805
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000806 def __getstate__(self):
807 if self.closed:
808 raise ValueError("__getstate__ on closed file")
809 return self.__dict__.copy()
810
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 def getvalue(self):
812 """Return the bytes value (contents) of the buffer
813 """
814 if self.closed:
815 raise ValueError("getvalue on closed file")
816 return bytes(self._buffer)
817
Antoine Pitrou972ee132010-09-06 18:48:21 +0000818 def getbuffer(self):
819 """Return a readable and writable view of the buffer.
820 """
821 return memoryview(self._buffer)
822
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 def read(self, n=None):
824 if self.closed:
825 raise ValueError("read from closed file")
826 if n is None:
827 n = -1
828 if n < 0:
829 n = len(self._buffer)
830 if len(self._buffer) <= self._pos:
831 return b""
832 newpos = min(len(self._buffer), self._pos + n)
833 b = self._buffer[self._pos : newpos]
834 self._pos = newpos
835 return bytes(b)
836
837 def read1(self, n):
838 """This is the same as read.
839 """
840 return self.read(n)
841
842 def write(self, b):
843 if self.closed:
844 raise ValueError("write to closed file")
845 if isinstance(b, str):
846 raise TypeError("can't write str to binary stream")
847 n = len(b)
848 if n == 0:
849 return 0
850 pos = self._pos
851 if pos > len(self._buffer):
852 # Inserts null bytes between the current end of the file
853 # and the new write position.
854 padding = b'\x00' * (pos - len(self._buffer))
855 self._buffer += padding
856 self._buffer[pos:pos + n] = b
857 self._pos += n
858 return n
859
860 def seek(self, pos, whence=0):
861 if self.closed:
862 raise ValueError("seek on closed file")
863 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000864 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 except AttributeError as err:
866 raise TypeError("an integer is required") from err
867 if whence == 0:
868 if pos < 0:
869 raise ValueError("negative seek position %r" % (pos,))
870 self._pos = pos
871 elif whence == 1:
872 self._pos = max(0, self._pos + pos)
873 elif whence == 2:
874 self._pos = max(0, len(self._buffer) + pos)
875 else:
Jesus Cea94363612012-06-22 18:32:07 +0200876 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877 return self._pos
878
879 def tell(self):
880 if self.closed:
881 raise ValueError("tell on closed file")
882 return self._pos
883
884 def truncate(self, pos=None):
885 if self.closed:
886 raise ValueError("truncate on closed file")
887 if pos is None:
888 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000889 else:
890 try:
891 pos.__index__
892 except AttributeError as err:
893 raise TypeError("an integer is required") from err
894 if pos < 0:
895 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000897 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000898
899 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200900 if self.closed:
901 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 return True
903
904 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200905 if self.closed:
906 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000907 return True
908
909 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200910 if self.closed:
911 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000912 return True
913
914
915class BufferedReader(_BufferedIOMixin):
916
917 """BufferedReader(raw[, buffer_size])
918
919 A buffer for a readable, sequential BaseRawIO object.
920
921 The constructor creates a BufferedReader for the given readable raw
922 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
923 is used.
924 """
925
926 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
927 """Create a new buffered reader using the given readable raw IO object.
928 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000929 if not raw.readable():
930 raise IOError('"raw" argument must be readable.')
931
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000932 _BufferedIOMixin.__init__(self, raw)
933 if buffer_size <= 0:
934 raise ValueError("invalid buffer size")
935 self.buffer_size = buffer_size
936 self._reset_read_buf()
937 self._read_lock = Lock()
938
939 def _reset_read_buf(self):
940 self._read_buf = b""
941 self._read_pos = 0
942
943 def read(self, n=None):
944 """Read n bytes.
945
946 Returns exactly n bytes of data unless the underlying raw IO
947 stream reaches EOF or if the call would block in non-blocking
948 mode. If n is negative, read until EOF or until read() would
949 block.
950 """
951 if n is not None and n < -1:
952 raise ValueError("invalid number of bytes to read")
953 with self._read_lock:
954 return self._read_unlocked(n)
955
956 def _read_unlocked(self, n=None):
957 nodata_val = b""
958 empty_values = (b"", None)
959 buf = self._read_buf
960 pos = self._read_pos
961
962 # Special case for when the number of bytes to read is unspecified.
963 if n is None or n == -1:
964 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200965 if hasattr(self.raw, 'readall'):
966 chunk = self.raw.readall()
967 if chunk is None:
968 return buf[pos:] or None
969 else:
970 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000971 chunks = [buf[pos:]] # Strip the consumed bytes.
972 current_size = 0
973 while True:
974 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000975 try:
976 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200977 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000978 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000979 if chunk in empty_values:
980 nodata_val = chunk
981 break
982 current_size += len(chunk)
983 chunks.append(chunk)
984 return b"".join(chunks) or nodata_val
985
986 # The number of bytes to read is specified, return at most n bytes.
987 avail = len(buf) - pos # Length of the available buffered data.
988 if n <= avail:
989 # Fast path: the data to read is fully buffered.
990 self._read_pos += n
991 return buf[pos:pos+n]
992 # Slow path: read from the stream until enough bytes are read,
993 # or until an EOF occurs or until read() would block.
994 chunks = [buf[pos:]]
995 wanted = max(self.buffer_size, n)
996 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000997 try:
998 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200999 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001000 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001001 if chunk in empty_values:
1002 nodata_val = chunk
1003 break
1004 avail += len(chunk)
1005 chunks.append(chunk)
1006 # n is more then avail only when an EOF occurred or when
1007 # read() would have blocked.
1008 n = min(n, avail)
1009 out = b"".join(chunks)
1010 self._read_buf = out[n:] # Save the extra data in the buffer.
1011 self._read_pos = 0
1012 return out[:n] if out else nodata_val
1013
1014 def peek(self, n=0):
1015 """Returns buffered bytes without advancing the position.
1016
1017 The argument indicates a desired minimal number of bytes; we
1018 do at most one raw read to satisfy it. We never return more
1019 than self.buffer_size.
1020 """
1021 with self._read_lock:
1022 return self._peek_unlocked(n)
1023
1024 def _peek_unlocked(self, n=0):
1025 want = min(n, self.buffer_size)
1026 have = len(self._read_buf) - self._read_pos
1027 if have < want or have <= 0:
1028 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001029 while True:
1030 try:
1031 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001032 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001033 continue
1034 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 if current:
1036 self._read_buf = self._read_buf[self._read_pos:] + current
1037 self._read_pos = 0
1038 return self._read_buf[self._read_pos:]
1039
1040 def read1(self, n):
1041 """Reads up to n bytes, with at most one read() system call."""
1042 # Returns up to n bytes. If at least one byte is buffered, we
1043 # only return buffered bytes. Otherwise, we do one raw read.
1044 if n < 0:
1045 raise ValueError("number of bytes to read must be positive")
1046 if n == 0:
1047 return b""
1048 with self._read_lock:
1049 self._peek_unlocked(1)
1050 return self._read_unlocked(
1051 min(n, len(self._read_buf) - self._read_pos))
1052
1053 def tell(self):
1054 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1055
1056 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001057 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001058 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 with self._read_lock:
1060 if whence == 1:
1061 pos -= len(self._read_buf) - self._read_pos
1062 pos = _BufferedIOMixin.seek(self, pos, whence)
1063 self._reset_read_buf()
1064 return pos
1065
1066class BufferedWriter(_BufferedIOMixin):
1067
1068 """A buffer for a writeable sequential RawIO object.
1069
1070 The constructor creates a BufferedWriter for the given writeable raw
1071 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001072 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 """
1074
Florent Xicluna109d5732012-07-07 17:03:22 +02001075 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001076 if not raw.writable():
1077 raise IOError('"raw" argument must be writable.')
1078
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 _BufferedIOMixin.__init__(self, raw)
1080 if buffer_size <= 0:
1081 raise ValueError("invalid buffer size")
1082 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 self._write_buf = bytearray()
1084 self._write_lock = Lock()
1085
1086 def write(self, b):
1087 if self.closed:
1088 raise ValueError("write to closed file")
1089 if isinstance(b, str):
1090 raise TypeError("can't write str to binary stream")
1091 with self._write_lock:
1092 # XXX we can implement some more tricks to try and avoid
1093 # partial writes
1094 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001095 # We're full, so let's pre-flush the buffer. (This may
1096 # raise BlockingIOError with characters_written == 0.)
1097 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 before = len(self._write_buf)
1099 self._write_buf.extend(b)
1100 written = len(self._write_buf) - before
1101 if len(self._write_buf) > self.buffer_size:
1102 try:
1103 self._flush_unlocked()
1104 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001105 if len(self._write_buf) > self.buffer_size:
1106 # We've hit the buffer_size. We have to accept a partial
1107 # write and cut back our buffer.
1108 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001110 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001111 raise BlockingIOError(e.errno, e.strerror, written)
1112 return written
1113
1114 def truncate(self, pos=None):
1115 with self._write_lock:
1116 self._flush_unlocked()
1117 if pos is None:
1118 pos = self.raw.tell()
1119 return self.raw.truncate(pos)
1120
1121 def flush(self):
1122 with self._write_lock:
1123 self._flush_unlocked()
1124
1125 def _flush_unlocked(self):
1126 if self.closed:
1127 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001128 while self._write_buf:
1129 try:
1130 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001131 except InterruptedError:
1132 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001133 except BlockingIOError:
1134 raise RuntimeError("self.raw should implement RawIOBase: it "
1135 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001136 if n is None:
1137 raise BlockingIOError(
1138 errno.EAGAIN,
1139 "write could not complete without blocking", 0)
1140 if n > len(self._write_buf) or n < 0:
1141 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143
1144 def tell(self):
1145 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1146
1147 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001148 if whence not in valid_seek_flags:
1149 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 with self._write_lock:
1151 self._flush_unlocked()
1152 return _BufferedIOMixin.seek(self, pos, whence)
1153
1154
1155class BufferedRWPair(BufferedIOBase):
1156
1157 """A buffered reader and writer object together.
1158
1159 A buffered reader object and buffered writer object put together to
1160 form a sequential IO object that can read and write. This is typically
1161 used with a socket or two-way pipe.
1162
1163 reader and writer are RawIOBase objects that are readable and
1164 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001165 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 """
1167
1168 # XXX The usefulness of this (compared to having two separate IO
1169 # objects) is questionable.
1170
Florent Xicluna109d5732012-07-07 17:03:22 +02001171 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 """Constructor.
1173
1174 The arguments are two RawIO instances.
1175 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001176 if not reader.readable():
1177 raise IOError('"reader" argument must be readable.')
1178
1179 if not writer.writable():
1180 raise IOError('"writer" argument must be writable.')
1181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001183 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184
1185 def read(self, n=None):
1186 if n is None:
1187 n = -1
1188 return self.reader.read(n)
1189
1190 def readinto(self, b):
1191 return self.reader.readinto(b)
1192
1193 def write(self, b):
1194 return self.writer.write(b)
1195
1196 def peek(self, n=0):
1197 return self.reader.peek(n)
1198
1199 def read1(self, n):
1200 return self.reader.read1(n)
1201
1202 def readable(self):
1203 return self.reader.readable()
1204
1205 def writable(self):
1206 return self.writer.writable()
1207
1208 def flush(self):
1209 return self.writer.flush()
1210
1211 def close(self):
1212 self.writer.close()
1213 self.reader.close()
1214
1215 def isatty(self):
1216 return self.reader.isatty() or self.writer.isatty()
1217
1218 @property
1219 def closed(self):
1220 return self.writer.closed
1221
1222
1223class BufferedRandom(BufferedWriter, BufferedReader):
1224
1225 """A buffered interface to random access streams.
1226
1227 The constructor creates a reader and writer for a seekable stream,
1228 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001229 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230 """
1231
Florent Xicluna109d5732012-07-07 17:03:22 +02001232 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 raw._checkSeekable()
1234 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001235 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236
1237 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001238 if whence not in valid_seek_flags:
1239 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240 self.flush()
1241 if self._read_buf:
1242 # Undo read ahead.
1243 with self._read_lock:
1244 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1245 # First do the raw seek, then empty the read buffer, so that
1246 # if the raw seek fails, we don't lose buffered data forever.
1247 pos = self.raw.seek(pos, whence)
1248 with self._read_lock:
1249 self._reset_read_buf()
1250 if pos < 0:
1251 raise IOError("seek() returned invalid position")
1252 return pos
1253
1254 def tell(self):
1255 if self._write_buf:
1256 return BufferedWriter.tell(self)
1257 else:
1258 return BufferedReader.tell(self)
1259
1260 def truncate(self, pos=None):
1261 if pos is None:
1262 pos = self.tell()
1263 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001264 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265
1266 def read(self, n=None):
1267 if n is None:
1268 n = -1
1269 self.flush()
1270 return BufferedReader.read(self, n)
1271
1272 def readinto(self, b):
1273 self.flush()
1274 return BufferedReader.readinto(self, b)
1275
1276 def peek(self, n=0):
1277 self.flush()
1278 return BufferedReader.peek(self, n)
1279
1280 def read1(self, n):
1281 self.flush()
1282 return BufferedReader.read1(self, n)
1283
1284 def write(self, b):
1285 if self._read_buf:
1286 # Undo readahead
1287 with self._read_lock:
1288 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1289 self._reset_read_buf()
1290 return BufferedWriter.write(self, b)
1291
1292
1293class TextIOBase(IOBase):
1294
1295 """Base class for text I/O.
1296
1297 This class provides a character and line based interface to stream
1298 I/O. There is no readinto method because Python's character strings
1299 are immutable. There is no public constructor.
1300 """
1301
Georg Brandl4d73b572011-01-13 07:13:06 +00001302 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001303 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 Read from underlying buffer until we have n characters or we hit EOF.
1306 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001307
1308 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 """
1310 self._unsupported("read")
1311
Raymond Hettinger3c940242011-01-12 23:39:31 +00001312 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001313 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001314 self._unsupported("write")
1315
Georg Brandl4d73b572011-01-13 07:13:06 +00001316 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001317 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 self._unsupported("truncate")
1319
Raymond Hettinger3c940242011-01-12 23:39:31 +00001320 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321 """Read until newline or EOF.
1322
1323 Returns an empty string if EOF is hit immediately.
1324 """
1325 self._unsupported("readline")
1326
Raymond Hettinger3c940242011-01-12 23:39:31 +00001327 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001328 """
1329 Separate the underlying buffer from the TextIOBase and return it.
1330
1331 After the underlying buffer has been detached, the TextIO is in an
1332 unusable state.
1333 """
1334 self._unsupported("detach")
1335
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 @property
1337 def encoding(self):
1338 """Subclasses should override."""
1339 return None
1340
1341 @property
1342 def newlines(self):
1343 """Line endings translated so far.
1344
1345 Only line endings translated during reading are considered.
1346
1347 Subclasses should override.
1348 """
1349 return None
1350
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001351 @property
1352 def errors(self):
1353 """Error setting of the decoder or encoder.
1354
1355 Subclasses should override."""
1356 return None
1357
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358io.TextIOBase.register(TextIOBase)
1359
1360
1361class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1362 r"""Codec used when reading a file in universal newlines mode. It wraps
1363 another incremental decoder, translating \r\n and \r into \n. It also
1364 records the types of newlines encountered. When used with
1365 translate=False, it ensures that the newline sequence is returned in
1366 one piece.
1367 """
1368 def __init__(self, decoder, translate, errors='strict'):
1369 codecs.IncrementalDecoder.__init__(self, errors=errors)
1370 self.translate = translate
1371 self.decoder = decoder
1372 self.seennl = 0
1373 self.pendingcr = False
1374
1375 def decode(self, input, final=False):
1376 # decode input (with the eventual \r from a previous pass)
1377 if self.decoder is None:
1378 output = input
1379 else:
1380 output = self.decoder.decode(input, final=final)
1381 if self.pendingcr and (output or final):
1382 output = "\r" + output
1383 self.pendingcr = False
1384
1385 # retain last \r even when not translating data:
1386 # then readline() is sure to get \r\n in one pass
1387 if output.endswith("\r") and not final:
1388 output = output[:-1]
1389 self.pendingcr = True
1390
1391 # Record which newlines are read
1392 crlf = output.count('\r\n')
1393 cr = output.count('\r') - crlf
1394 lf = output.count('\n') - crlf
1395 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1396 | (crlf and self._CRLF)
1397
1398 if self.translate:
1399 if crlf:
1400 output = output.replace("\r\n", "\n")
1401 if cr:
1402 output = output.replace("\r", "\n")
1403
1404 return output
1405
1406 def getstate(self):
1407 if self.decoder is None:
1408 buf = b""
1409 flag = 0
1410 else:
1411 buf, flag = self.decoder.getstate()
1412 flag <<= 1
1413 if self.pendingcr:
1414 flag |= 1
1415 return buf, flag
1416
1417 def setstate(self, state):
1418 buf, flag = state
1419 self.pendingcr = bool(flag & 1)
1420 if self.decoder is not None:
1421 self.decoder.setstate((buf, flag >> 1))
1422
1423 def reset(self):
1424 self.seennl = 0
1425 self.pendingcr = False
1426 if self.decoder is not None:
1427 self.decoder.reset()
1428
1429 _LF = 1
1430 _CR = 2
1431 _CRLF = 4
1432
1433 @property
1434 def newlines(self):
1435 return (None,
1436 "\n",
1437 "\r",
1438 ("\r", "\n"),
1439 "\r\n",
1440 ("\n", "\r\n"),
1441 ("\r", "\r\n"),
1442 ("\r", "\n", "\r\n")
1443 )[self.seennl]
1444
1445
1446class TextIOWrapper(TextIOBase):
1447
1448 r"""Character and line based layer over a BufferedIOBase object, buffer.
1449
1450 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001451 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452
1453 errors determines the strictness of encoding and decoding (see the
1454 codecs.register) and defaults to "strict".
1455
1456 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1457 handling of line endings. If it is None, universal newlines is
1458 enabled. With this enabled, on input, the lines endings '\n', '\r',
1459 or '\r\n' are translated to '\n' before being returned to the
1460 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001461 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462 legal values, that newline becomes the newline when the file is read
1463 and it is returned untranslated. On output, '\n' is converted to the
1464 newline.
1465
1466 If line_buffering is True, a call to flush is implied when a call to
1467 write contains a newline character.
1468 """
1469
1470 _CHUNK_SIZE = 2048
1471
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001472 # The write_through argument has no effect here since this
1473 # implementation always writes through. The argument is present only
1474 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001475 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001476 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001477 if newline is not None and not isinstance(newline, str):
1478 raise TypeError("illegal newline type: %r" % (type(newline),))
1479 if newline not in (None, "", "\n", "\r", "\r\n"):
1480 raise ValueError("illegal newline value: %r" % (newline,))
1481 if encoding is None:
1482 try:
1483 encoding = os.device_encoding(buffer.fileno())
1484 except (AttributeError, UnsupportedOperation):
1485 pass
1486 if encoding is None:
1487 try:
1488 import locale
1489 except ImportError:
1490 # Importing locale may fail if Python is being built
1491 encoding = "ascii"
1492 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001493 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001494
1495 if not isinstance(encoding, str):
1496 raise ValueError("invalid encoding: %r" % encoding)
1497
Georg Brandl2fc8f772014-03-02 09:18:31 +01001498 if not codecs.lookup(encoding)._is_text_encoding:
1499 msg = ("%r is not a text encoding; "
1500 "use codecs.open() to handle arbitrary codecs")
1501 raise LookupError(msg % encoding)
1502
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 if errors is None:
1504 errors = "strict"
1505 else:
1506 if not isinstance(errors, str):
1507 raise ValueError("invalid errors: %r" % errors)
1508
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001509 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 self._line_buffering = line_buffering
1511 self._encoding = encoding
1512 self._errors = errors
1513 self._readuniversal = not newline
1514 self._readtranslate = newline is None
1515 self._readnl = newline
1516 self._writetranslate = newline != ''
1517 self._writenl = newline or os.linesep
1518 self._encoder = None
1519 self._decoder = None
1520 self._decoded_chars = '' # buffer for text returned from decoder
1521 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1522 self._snapshot = None # info for reconstructing decoder state
1523 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001524 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001525 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526
Antoine Pitroue4501852009-05-14 18:55:55 +00001527 if self._seekable and self.writable():
1528 position = self.buffer.tell()
1529 if position != 0:
1530 try:
1531 self._get_encoder().setstate(0)
1532 except LookupError:
1533 # Sometimes the encoder doesn't exist
1534 pass
1535
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1537 # where dec_flags is the second (integer) item of the decoder state
1538 # and next_input is the chunk of input bytes that comes next after the
1539 # snapshot point. We use this to reconstruct decoder states in tell().
1540
1541 # Naming convention:
1542 # - "bytes_..." for integer variables that count input bytes
1543 # - "chars_..." for integer variables that count decoded characters
1544
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001545 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001546 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001547 try:
1548 name = self.name
1549 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001550 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001551 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001552 result += " name={0!r}".format(name)
1553 try:
1554 mode = self.mode
1555 except AttributeError:
1556 pass
1557 else:
1558 result += " mode={0!r}".format(mode)
1559 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001560
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 @property
1562 def encoding(self):
1563 return self._encoding
1564
1565 @property
1566 def errors(self):
1567 return self._errors
1568
1569 @property
1570 def line_buffering(self):
1571 return self._line_buffering
1572
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001573 @property
1574 def buffer(self):
1575 return self._buffer
1576
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001578 if self.closed:
1579 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001580 return self._seekable
1581
1582 def readable(self):
1583 return self.buffer.readable()
1584
1585 def writable(self):
1586 return self.buffer.writable()
1587
1588 def flush(self):
1589 self.buffer.flush()
1590 self._telling = self._seekable
1591
1592 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001593 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06001594 try:
1595 self.flush()
1596 finally:
1597 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598
1599 @property
1600 def closed(self):
1601 return self.buffer.closed
1602
1603 @property
1604 def name(self):
1605 return self.buffer.name
1606
1607 def fileno(self):
1608 return self.buffer.fileno()
1609
1610 def isatty(self):
1611 return self.buffer.isatty()
1612
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001613 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001614 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615 if self.closed:
1616 raise ValueError("write to closed file")
1617 if not isinstance(s, str):
1618 raise TypeError("can't write %s to text stream" %
1619 s.__class__.__name__)
1620 length = len(s)
1621 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1622 if haslf and self._writetranslate and self._writenl != "\n":
1623 s = s.replace("\n", self._writenl)
1624 encoder = self._encoder or self._get_encoder()
1625 # XXX What if we were just reading?
1626 b = encoder.encode(s)
1627 self.buffer.write(b)
1628 if self._line_buffering and (haslf or "\r" in s):
1629 self.flush()
1630 self._snapshot = None
1631 if self._decoder:
1632 self._decoder.reset()
1633 return length
1634
1635 def _get_encoder(self):
1636 make_encoder = codecs.getincrementalencoder(self._encoding)
1637 self._encoder = make_encoder(self._errors)
1638 return self._encoder
1639
1640 def _get_decoder(self):
1641 make_decoder = codecs.getincrementaldecoder(self._encoding)
1642 decoder = make_decoder(self._errors)
1643 if self._readuniversal:
1644 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1645 self._decoder = decoder
1646 return decoder
1647
1648 # The following three methods implement an ADT for _decoded_chars.
1649 # Text returned from the decoder is buffered here until the client
1650 # requests it by calling our read() or readline() method.
1651 def _set_decoded_chars(self, chars):
1652 """Set the _decoded_chars buffer."""
1653 self._decoded_chars = chars
1654 self._decoded_chars_used = 0
1655
1656 def _get_decoded_chars(self, n=None):
1657 """Advance into the _decoded_chars buffer."""
1658 offset = self._decoded_chars_used
1659 if n is None:
1660 chars = self._decoded_chars[offset:]
1661 else:
1662 chars = self._decoded_chars[offset:offset + n]
1663 self._decoded_chars_used += len(chars)
1664 return chars
1665
1666 def _rewind_decoded_chars(self, n):
1667 """Rewind the _decoded_chars buffer."""
1668 if self._decoded_chars_used < n:
1669 raise AssertionError("rewind decoded_chars out of bounds")
1670 self._decoded_chars_used -= n
1671
1672 def _read_chunk(self):
1673 """
1674 Read and decode the next chunk of data from the BufferedReader.
1675 """
1676
1677 # The return value is True unless EOF was reached. The decoded
1678 # string is placed in self._decoded_chars (replacing its previous
1679 # value). The entire input chunk is sent to the decoder, though
1680 # some of it may remain buffered in the decoder, yet to be
1681 # converted.
1682
1683 if self._decoder is None:
1684 raise ValueError("no decoder")
1685
1686 if self._telling:
1687 # To prepare for tell(), we need to snapshot a point in the
1688 # file where the decoder's input buffer is empty.
1689
1690 dec_buffer, dec_flags = self._decoder.getstate()
1691 # Given this, we know there was a valid snapshot point
1692 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1693
1694 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001695 if self._has_read1:
1696 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1697 else:
1698 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001700 decoded_chars = self._decoder.decode(input_chunk, eof)
1701 self._set_decoded_chars(decoded_chars)
1702 if decoded_chars:
1703 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1704 else:
1705 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706
1707 if self._telling:
1708 # At the snapshot point, len(dec_buffer) bytes before the read,
1709 # the next input to be decoded is dec_buffer + input_chunk.
1710 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1711
1712 return not eof
1713
1714 def _pack_cookie(self, position, dec_flags=0,
1715 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1716 # The meaning of a tell() cookie is: seek to position, set the
1717 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1718 # into the decoder with need_eof as the EOF flag, then skip
1719 # chars_to_skip characters of the decoded result. For most simple
1720 # decoders, tell() will often just give a byte offset in the file.
1721 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1722 (chars_to_skip<<192) | bool(need_eof)<<256)
1723
1724 def _unpack_cookie(self, bigint):
1725 rest, position = divmod(bigint, 1<<64)
1726 rest, dec_flags = divmod(rest, 1<<64)
1727 rest, bytes_to_feed = divmod(rest, 1<<64)
1728 need_eof, chars_to_skip = divmod(rest, 1<<64)
1729 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1730
1731 def tell(self):
1732 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001733 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 if not self._telling:
1735 raise IOError("telling position disabled by next() call")
1736 self.flush()
1737 position = self.buffer.tell()
1738 decoder = self._decoder
1739 if decoder is None or self._snapshot is None:
1740 if self._decoded_chars:
1741 # This should never happen.
1742 raise AssertionError("pending decoded text")
1743 return position
1744
1745 # Skip backward to the snapshot point (see _read_chunk).
1746 dec_flags, next_input = self._snapshot
1747 position -= len(next_input)
1748
1749 # How many decoded characters have been used up since the snapshot?
1750 chars_to_skip = self._decoded_chars_used
1751 if chars_to_skip == 0:
1752 # We haven't moved from the snapshot point.
1753 return self._pack_cookie(position, dec_flags)
1754
1755 # Starting from the snapshot position, we will walk the decoder
1756 # forward until it gives us enough decoded characters.
1757 saved_state = decoder.getstate()
1758 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001759 # Fast search for an acceptable start point, close to our
1760 # current pos.
1761 # Rationale: calling decoder.decode() has a large overhead
1762 # regardless of chunk size; we want the number of such calls to
1763 # be O(1) in most situations (common decoders, non-crazy input).
1764 # Actually, it will be exactly 1 for fixed-size codecs (all
1765 # 8-bit codecs, also UTF-16 and UTF-32).
1766 skip_bytes = int(self._b2cratio * chars_to_skip)
1767 skip_back = 1
1768 assert skip_bytes <= len(next_input)
1769 while skip_bytes > 0:
1770 decoder.setstate((b'', dec_flags))
1771 # Decode up to temptative start point
1772 n = len(decoder.decode(next_input[:skip_bytes]))
1773 if n <= chars_to_skip:
1774 b, d = decoder.getstate()
1775 if not b:
1776 # Before pos and no bytes buffered in decoder => OK
1777 dec_flags = d
1778 chars_to_skip -= n
1779 break
1780 # Skip back by buffered amount and reset heuristic
1781 skip_bytes -= len(b)
1782 skip_back = 1
1783 else:
1784 # We're too far ahead, skip back a bit
1785 skip_bytes -= skip_back
1786 skip_back = skip_back * 2
1787 else:
1788 skip_bytes = 0
1789 decoder.setstate((b'', dec_flags))
1790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001792 start_pos = position + skip_bytes
1793 start_flags = dec_flags
1794 if chars_to_skip == 0:
1795 # We haven't moved from the start point.
1796 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001797
1798 # Feed the decoder one byte at a time. As we go, note the
1799 # nearest "safe start point" before the current location
1800 # (a point where the decoder has nothing buffered, so seek()
1801 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001802 bytes_fed = 0
1803 need_eof = 0
1804 # Chars decoded since `start_pos`
1805 chars_decoded = 0
1806 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001807 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001808 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 dec_buffer, dec_flags = decoder.getstate()
1810 if not dec_buffer and chars_decoded <= chars_to_skip:
1811 # Decoder buffer is empty, so this is a safe start point.
1812 start_pos += bytes_fed
1813 chars_to_skip -= chars_decoded
1814 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1815 if chars_decoded >= chars_to_skip:
1816 break
1817 else:
1818 # We didn't get enough decoded data; signal EOF to get more.
1819 chars_decoded += len(decoder.decode(b'', final=True))
1820 need_eof = 1
1821 if chars_decoded < chars_to_skip:
1822 raise IOError("can't reconstruct logical file position")
1823
1824 # The returned cookie corresponds to the last safe start point.
1825 return self._pack_cookie(
1826 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1827 finally:
1828 decoder.setstate(saved_state)
1829
1830 def truncate(self, pos=None):
1831 self.flush()
1832 if pos is None:
1833 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001834 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001836 def detach(self):
1837 if self.buffer is None:
1838 raise ValueError("buffer is already detached")
1839 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001840 buffer = self._buffer
1841 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001842 return buffer
1843
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844 def seek(self, cookie, whence=0):
1845 if self.closed:
1846 raise ValueError("tell on closed file")
1847 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001848 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 if whence == 1: # seek relative to current position
1850 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001851 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001852 # Seeking to the current position should attempt to
1853 # sync the underlying buffer with the current position.
1854 whence = 0
1855 cookie = self.tell()
1856 if whence == 2: # seek relative to end of file
1857 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001858 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 self.flush()
1860 position = self.buffer.seek(0, 2)
1861 self._set_decoded_chars('')
1862 self._snapshot = None
1863 if self._decoder:
1864 self._decoder.reset()
1865 return position
1866 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001867 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868 if cookie < 0:
1869 raise ValueError("negative seek position %r" % (cookie,))
1870 self.flush()
1871
1872 # The strategy of seek() is to go back to the safe start point
1873 # and replay the effect of read(chars_to_skip) from there.
1874 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1875 self._unpack_cookie(cookie)
1876
1877 # Seek back to the safe start point.
1878 self.buffer.seek(start_pos)
1879 self._set_decoded_chars('')
1880 self._snapshot = None
1881
1882 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001883 if cookie == 0 and self._decoder:
1884 self._decoder.reset()
1885 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886 self._decoder = self._decoder or self._get_decoder()
1887 self._decoder.setstate((b'', dec_flags))
1888 self._snapshot = (dec_flags, b'')
1889
1890 if chars_to_skip:
1891 # Just like _read_chunk, feed the decoder and save a snapshot.
1892 input_chunk = self.buffer.read(bytes_to_feed)
1893 self._set_decoded_chars(
1894 self._decoder.decode(input_chunk, need_eof))
1895 self._snapshot = (dec_flags, input_chunk)
1896
1897 # Skip chars_to_skip of the decoded characters.
1898 if len(self._decoded_chars) < chars_to_skip:
1899 raise IOError("can't restore logical file position")
1900 self._decoded_chars_used = chars_to_skip
1901
Antoine Pitroue4501852009-05-14 18:55:55 +00001902 # Finally, reset the encoder (merely useful for proper BOM handling)
1903 try:
1904 encoder = self._encoder or self._get_encoder()
1905 except LookupError:
1906 # Sometimes the encoder doesn't exist
1907 pass
1908 else:
1909 if cookie != 0:
1910 encoder.setstate(0)
1911 else:
1912 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001913 return cookie
1914
1915 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001916 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 if n is None:
1918 n = -1
1919 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001920 try:
1921 n.__index__
1922 except AttributeError as err:
1923 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924 if n < 0:
1925 # Read everything.
1926 result = (self._get_decoded_chars() +
1927 decoder.decode(self.buffer.read(), final=True))
1928 self._set_decoded_chars('')
1929 self._snapshot = None
1930 return result
1931 else:
1932 # Keep reading chunks until we have n characters to return.
1933 eof = False
1934 result = self._get_decoded_chars(n)
1935 while len(result) < n and not eof:
1936 eof = not self._read_chunk()
1937 result += self._get_decoded_chars(n - len(result))
1938 return result
1939
1940 def __next__(self):
1941 self._telling = False
1942 line = self.readline()
1943 if not line:
1944 self._snapshot = None
1945 self._telling = self._seekable
1946 raise StopIteration
1947 return line
1948
1949 def readline(self, limit=None):
1950 if self.closed:
1951 raise ValueError("read from closed file")
1952 if limit is None:
1953 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001954 elif not isinstance(limit, int):
1955 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956
1957 # Grab all the decoded text (we will rewind any extra bits later).
1958 line = self._get_decoded_chars()
1959
1960 start = 0
1961 # Make the decoder if it doesn't already exist.
1962 if not self._decoder:
1963 self._get_decoder()
1964
1965 pos = endpos = None
1966 while True:
1967 if self._readtranslate:
1968 # Newlines are already translated, only search for \n
1969 pos = line.find('\n', start)
1970 if pos >= 0:
1971 endpos = pos + 1
1972 break
1973 else:
1974 start = len(line)
1975
1976 elif self._readuniversal:
1977 # Universal newline search. Find any of \r, \r\n, \n
1978 # The decoder ensures that \r\n are not split in two pieces
1979
1980 # In C we'd look for these in parallel of course.
1981 nlpos = line.find("\n", start)
1982 crpos = line.find("\r", start)
1983 if crpos == -1:
1984 if nlpos == -1:
1985 # Nothing found
1986 start = len(line)
1987 else:
1988 # Found \n
1989 endpos = nlpos + 1
1990 break
1991 elif nlpos == -1:
1992 # Found lone \r
1993 endpos = crpos + 1
1994 break
1995 elif nlpos < crpos:
1996 # Found \n
1997 endpos = nlpos + 1
1998 break
1999 elif nlpos == crpos + 1:
2000 # Found \r\n
2001 endpos = crpos + 2
2002 break
2003 else:
2004 # Found \r
2005 endpos = crpos + 1
2006 break
2007 else:
2008 # non-universal
2009 pos = line.find(self._readnl)
2010 if pos >= 0:
2011 endpos = pos + len(self._readnl)
2012 break
2013
2014 if limit >= 0 and len(line) >= limit:
2015 endpos = limit # reached length limit
2016 break
2017
2018 # No line ending seen yet - get more data'
2019 while self._read_chunk():
2020 if self._decoded_chars:
2021 break
2022 if self._decoded_chars:
2023 line += self._get_decoded_chars()
2024 else:
2025 # end of file
2026 self._set_decoded_chars('')
2027 self._snapshot = None
2028 return line
2029
2030 if limit >= 0 and endpos > limit:
2031 endpos = limit # don't exceed limit
2032
2033 # Rewind _decoded_chars to just after the line ending we found.
2034 self._rewind_decoded_chars(len(line) - endpos)
2035 return line[:endpos]
2036
2037 @property
2038 def newlines(self):
2039 return self._decoder.newlines if self._decoder else None
2040
2041
2042class StringIO(TextIOWrapper):
2043 """Text I/O implementation using an in-memory buffer.
2044
2045 The initial_value argument sets the value of object. The newline
2046 argument is like the one of TextIOWrapper's constructor.
2047 """
2048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 def __init__(self, initial_value="", newline="\n"):
2050 super(StringIO, self).__init__(BytesIO(),
2051 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002052 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002054 # Issue #5645: make universal newlines semantics the same as in the
2055 # C version, even under Windows.
2056 if newline is None:
2057 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002058 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002060 raise TypeError("initial_value must be str or None, not {0}"
2061 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 initial_value = str(initial_value)
2063 self.write(initial_value)
2064 self.seek(0)
2065
2066 def getvalue(self):
2067 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002068 decoder = self._decoder or self._get_decoder()
2069 old_state = decoder.getstate()
2070 decoder.reset()
2071 try:
2072 return decoder.decode(self.buffer.getvalue(), final=True)
2073 finally:
2074 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002075
2076 def __repr__(self):
2077 # TextIOWrapper tells the encoding in its repr. In StringIO,
2078 # that's a implementation detail.
2079 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002080
2081 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002082 def errors(self):
2083 return None
2084
2085 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002086 def encoding(self):
2087 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002088
2089 def detach(self):
2090 # This doesn't make sense on StringIO.
2091 self._unsupported("detach")