blob: 9cbb364dd1b74d412f64057dd4953d613912b205 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
Jesus Cea94363612012-06-22 18:32:07 +020018valid_seek_flags = {0, 1, 2} # Hardwired values
19if hasattr(os, 'SEEK_HOLE') :
20 valid_seek_flags.add(os.SEEK_HOLE)
21 valid_seek_flags.add(os.SEEK_DATA)
22
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020030# Rebind for compatibility
31BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33
Georg Brandl4d73b572011-01-13 07:13:06 +000034def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020035 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
37 r"""Open file and return a stream. Raise IOError upon failure.
38
39 file is either a text or byte string giving the name (and the path
40 if the file isn't in the current working directory) of the file to
41 be opened or an integer file descriptor of the file to be
42 wrapped. (If a file descriptor is given, it is closed when the
43 returned I/O object is closed, unless closefd is set to False.)
44
Charles-François Natalidc3044c2012-01-09 22:40:02 +010045 mode is an optional string that specifies the mode in which the file is
46 opened. It defaults to 'r' which means open for reading in text mode. Other
47 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010048 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010049 (which on some Unix systems, means that all writes append to the end of the
50 file regardless of the current seek position). In text mode, if encoding is
51 not specified the encoding used is platform dependent. (For reading and
52 writing raw bytes use binary mode and leave encoding unspecified.) The
53 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000054
55 ========= ===============================================================
56 Character Meaning
57 --------- ---------------------------------------------------------------
58 'r' open for reading (default)
59 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010060 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061 'a' open for writing, appending to the end of the file if it exists
62 'b' binary mode
63 't' text mode (default)
64 '+' open a disk file for updating (reading and writing)
65 'U' universal newline mode (for backwards compatibility; unneeded
66 for new code)
67 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Antoine Pitroud5587bc2009-12-19 21:08:31 +000082 buffering is an optional integer used to set the buffering policy.
83 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
84 line buffering (only usable in text mode), and an integer > 1 to indicate
85 the size of a fixed-size chunk buffer. When no buffering argument is
86 given, the default buffering policy works as follows:
87
88 * Binary files are buffered in fixed-size chunks; the size of the buffer
89 is chosen using a heuristic trying to determine the underlying device's
90 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
91 On many systems, the buffer will typically be 4096 or 8192 bytes long.
92
93 * "Interactive" text files (files for which isatty() returns True)
94 use line buffering. Other text files use the policy described above
95 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096
Raymond Hettingercbb80892011-01-13 18:15:51 +000097 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098 file. This should only be used in text mode. The default encoding is
99 platform dependent, but any encoding supported by Python can be
100 passed. See the codecs module for the list of supported encodings.
101
102 errors is an optional string that specifies how encoding errors are to
103 be handled---this argument should not be used in binary mode. Pass
104 'strict' to raise a ValueError exception if there is an encoding error
105 (the default of None has the same effect), or pass 'ignore' to ignore
106 errors. (Note that ignoring encoding errors can lead to data loss.)
107 See the documentation for codecs.register for a list of the permitted
108 encoding error strings.
109
Raymond Hettingercbb80892011-01-13 18:15:51 +0000110 newline is a string controlling how universal newlines works (it only
111 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
112 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113
114 * On input, if newline is None, universal newlines mode is
115 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
116 these are translated into '\n' before being returned to the
117 caller. If it is '', universal newline mode is enabled, but line
118 endings are returned to the caller untranslated. If it has any of
119 the other legal values, input lines are only terminated by the given
120 string, and the line ending is returned to the caller untranslated.
121
122 * On output, if newline is None, any '\n' characters written are
123 translated to the system default line separator, os.linesep. If
124 newline is '', no translation takes place. If newline is any of the
125 other legal values, any '\n' characters written are translated to
126 the given string.
127
Raymond Hettingercbb80892011-01-13 18:15:51 +0000128 closedfd is a bool. If closefd is False, the underlying file descriptor will
129 be kept open when the file is closed. This does not work when a file name is
130 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131
Ross Lagerwall59142db2011-10-31 20:34:46 +0200132 A custom opener can be used by passing a callable as *opener*. The
133 underlying file descriptor for the file object is then obtained by calling
134 *opener* with (*file*, *flags*). *opener* must return an open file
135 descriptor (passing os.open as *opener* results in functionality similar to
136 passing None).
137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (str, bytes, int)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, str):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000156 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, str):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, str):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100163 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100165 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100180 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200194 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 line_buffering = False
196 if buffering == 1 or buffering < 0 and raw.isatty():
197 buffering = -1
198 line_buffering = True
199 if buffering < 0:
200 buffering = DEFAULT_BUFFER_SIZE
201 try:
202 bs = os.fstat(raw.fileno()).st_blksize
203 except (os.error, AttributeError):
204 pass
205 else:
206 if bs > 1:
207 buffering = bs
208 if buffering < 0:
209 raise ValueError("invalid buffering size")
210 if buffering == 0:
211 if binary:
212 return raw
213 raise ValueError("can't have unbuffered text I/O")
214 if updating:
215 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100216 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000217 buffer = BufferedWriter(raw, buffering)
218 elif reading:
219 buffer = BufferedReader(raw, buffering)
220 else:
221 raise ValueError("unknown mode: %r" % mode)
222 if binary:
223 return buffer
224 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225 text.mode = mode
226 return text
227
228
229class DocDescriptor:
230 """Helper for builtins.open.__doc__
231 """
232 def __get__(self, obj, typ):
233 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000234 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 "errors=None, newline=None, closefd=True)\n\n" +
236 open.__doc__)
237
238class OpenWrapper:
239 """Wrapper for builtins.open
240
241 Trick so that open won't become a bound method when stored
242 as a class variable (as dbm.dumb does).
243
244 See initstdio() in Python/pythonrun.c.
245 """
246 __doc__ = DocDescriptor()
247
248 def __new__(cls, *args, **kwargs):
249 return open(*args, **kwargs)
250
251
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000252# In normal operation, both `UnsupportedOperation`s should be bound to the
253# same object.
254try:
255 UnsupportedOperation = io.UnsupportedOperation
256except AttributeError:
257 class UnsupportedOperation(ValueError, IOError):
258 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259
260
261class IOBase(metaclass=abc.ABCMeta):
262
263 """The abstract base class for all I/O classes, acting on streams of
264 bytes. There is no public constructor.
265
266 This class provides dummy implementations for many methods that
267 derived classes can override selectively; the default implementations
268 represent a file that cannot be read, written or seeked.
269
270 Even though IOBase does not declare read, readinto, or write because
271 their signatures will vary, implementations and clients should
272 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000273 may raise UnsupportedOperation when operations they do not support are
274 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276 The basic type used for binary data read from or written to a file is
277 bytes. bytearrays are accepted too, and in some cases (such as
278 readinto) needed. Text I/O classes work with str data.
279
280 Note that calling any method (even inquiries) on a closed stream is
281 undefined. Implementations may raise IOError in this case.
282
283 IOBase (and its subclasses) support the iterator protocol, meaning
284 that an IOBase object can be iterated over yielding the lines in a
285 stream.
286
287 IOBase also supports the :keyword:`with` statement. In this example,
288 fp is closed after the suite of the with statement is complete:
289
290 with open('spam.txt', 'r') as fp:
291 fp.write('Spam and eggs!')
292 """
293
294 ### Internal ###
295
Raymond Hettinger3c940242011-01-12 23:39:31 +0000296 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000297 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 raise UnsupportedOperation("%s.%s() not supported" %
299 (self.__class__.__name__, name))
300
301 ### Positioning ###
302
Georg Brandl4d73b572011-01-13 07:13:06 +0000303 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304 """Change stream position.
305
306 Change the stream position to byte offset offset. offset is
307 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309
310 * 0 -- start of stream (the default); offset should be zero or positive
311 * 1 -- current stream position; offset may be negative
312 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200313 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314
Raymond Hettingercbb80892011-01-13 18:15:51 +0000315 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """
317 self._unsupported("seek")
318
Raymond Hettinger3c940242011-01-12 23:39:31 +0000319 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000320 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 return self.seek(0, 1)
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Truncate file to size bytes.
325
326 Size defaults to the current IO position as reported by tell(). Return
327 the new size.
328 """
329 self._unsupported("truncate")
330
331 ### Flush and close ###
332
Raymond Hettinger3c940242011-01-12 23:39:31 +0000333 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 """Flush write buffers, if applicable.
335
336 This is not implemented for read-only and non-blocking streams.
337 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000338 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 # XXX Should this return the number of bytes written???
340
341 __closed = False
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Flush and close the IO object.
345
346 This method has no effect if the file is already closed.
347 """
348 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600349 try:
350 self.flush()
351 finally:
352 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000353
Raymond Hettinger3c940242011-01-12 23:39:31 +0000354 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 """Destructor. Calls close()."""
356 # The try/except block is in case this is called at program
357 # exit time, when it's possible that globals have already been
358 # deleted, and then the close() call might fail. Since
359 # there's nothing we can do about such failures and they annoy
360 # the end users, we suppress the traceback.
361 try:
362 self.close()
363 except:
364 pass
365
366 ### Inquiries ###
367
Raymond Hettinger3c940242011-01-12 23:39:31 +0000368 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000369 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000371 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 This method may need to do a test seek().
373 """
374 return False
375
376 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000380 raise UnsupportedOperation("File or stream is not seekable."
381 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
Raymond Hettinger3c940242011-01-12 23:39:31 +0000383 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000384 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000386 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387 """
388 return False
389
390 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000394 raise UnsupportedOperation("File or stream is not readable."
395 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396
Raymond Hettinger3c940242011-01-12 23:39:31 +0000397 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000398 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000400 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 """
402 return False
403
404 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000405 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 """
407 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000408 raise UnsupportedOperation("File or stream is not writable."
409 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410
411 @property
412 def closed(self):
413 """closed: bool. True iff the file has been closed.
414
415 For backwards compatibility, this is a property, not a predicate.
416 """
417 return self.__closed
418
419 def _checkClosed(self, msg=None):
420 """Internal: raise an ValueError if file is closed
421 """
422 if self.closed:
423 raise ValueError("I/O operation on closed file."
424 if msg is None else msg)
425
426 ### Context manager ###
427
Raymond Hettinger3c940242011-01-12 23:39:31 +0000428 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000429 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 self._checkClosed()
431 return self
432
Raymond Hettinger3c940242011-01-12 23:39:31 +0000433 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 """Context management protocol. Calls close()"""
435 self.close()
436
437 ### Lower-level APIs ###
438
439 # XXX Should these be present even if unimplemented?
440
Raymond Hettinger3c940242011-01-12 23:39:31 +0000441 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000442 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443
444 An IOError is raised if the IO object does not use a file descriptor.
445 """
446 self._unsupported("fileno")
447
Raymond Hettinger3c940242011-01-12 23:39:31 +0000448 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450
451 Return False if it can't be determined.
452 """
453 self._checkClosed()
454 return False
455
456 ### Readline[s] and writelines ###
457
Georg Brandl4d73b572011-01-13 07:13:06 +0000458 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000459 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460
461 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000462 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463
464 The line terminator is always b'\n' for binary files; for text
465 files, the newlines argument to open can be used to select the line
466 terminator(s) recognized.
467 """
468 # For backwards compatibility, a (slowish) readline().
469 if hasattr(self, "peek"):
470 def nreadahead():
471 readahead = self.peek(1)
472 if not readahead:
473 return 1
474 n = (readahead.find(b"\n") + 1) or len(readahead)
475 if limit >= 0:
476 n = min(n, limit)
477 return n
478 else:
479 def nreadahead():
480 return 1
481 if limit is None:
482 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000483 elif not isinstance(limit, int):
484 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 res = bytearray()
486 while limit < 0 or len(res) < limit:
487 b = self.read(nreadahead())
488 if not b:
489 break
490 res += b
491 if res.endswith(b"\n"):
492 break
493 return bytes(res)
494
495 def __iter__(self):
496 self._checkClosed()
497 return self
498
499 def __next__(self):
500 line = self.readline()
501 if not line:
502 raise StopIteration
503 return line
504
505 def readlines(self, hint=None):
506 """Return a list of lines from the stream.
507
508 hint can be specified to control the number of lines read: no more
509 lines will be read if the total size (in bytes/characters) of all
510 lines so far exceeds hint.
511 """
512 if hint is None or hint <= 0:
513 return list(self)
514 n = 0
515 lines = []
516 for line in self:
517 lines.append(line)
518 n += len(line)
519 if n >= hint:
520 break
521 return lines
522
523 def writelines(self, lines):
524 self._checkClosed()
525 for line in lines:
526 self.write(line)
527
528io.IOBase.register(IOBase)
529
530
531class RawIOBase(IOBase):
532
533 """Base class for raw binary I/O."""
534
535 # The read() method is implemented by calling readinto(); derived
536 # classes that want to support read() only need to implement
537 # readinto() as a primitive operation. In general, readinto() can be
538 # more efficient than read().
539
540 # (It would be tempting to also provide an implementation of
541 # readinto() in terms of read(), in case the latter is a more suitable
542 # primitive operation, but that would lead to nasty recursion in case
543 # a subclass doesn't implement either.)
544
Georg Brandl4d73b572011-01-13 07:13:06 +0000545 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000546 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547
548 Returns an empty bytes object on EOF, or None if the object is
549 set not to block and has no data to read.
550 """
551 if n is None:
552 n = -1
553 if n < 0:
554 return self.readall()
555 b = bytearray(n.__index__())
556 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000557 if n is None:
558 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559 del b[n:]
560 return bytes(b)
561
562 def readall(self):
563 """Read until EOF, using multiple read() call."""
564 res = bytearray()
565 while True:
566 data = self.read(DEFAULT_BUFFER_SIZE)
567 if not data:
568 break
569 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200570 if res:
571 return bytes(res)
572 else:
573 # b'' or None
574 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Raymond Hettinger3c940242011-01-12 23:39:31 +0000576 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000577 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578
Raymond Hettingercbb80892011-01-13 18:15:51 +0000579 Returns an int representing the number of bytes read (0 for EOF), or
580 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 """
582 self._unsupported("readinto")
583
Raymond Hettinger3c940242011-01-12 23:39:31 +0000584 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 """Write the given buffer to the IO stream.
586
587 Returns the number of bytes written, which may be less than len(b).
588 """
589 self._unsupported("write")
590
591io.RawIOBase.register(RawIOBase)
592from _io import FileIO
593RawIOBase.register(FileIO)
594
595
596class BufferedIOBase(IOBase):
597
598 """Base class for buffered IO objects.
599
600 The main difference with RawIOBase is that the read() method
601 supports omitting the size argument, and does not have a default
602 implementation that defers to readinto().
603
604 In addition, read(), readinto() and write() may raise
605 BlockingIOError if the underlying raw stream is in non-blocking
606 mode and not ready; unlike their raw counterparts, they will never
607 return None.
608
609 A typical implementation should not inherit from a RawIOBase
610 implementation, but wrap one.
611 """
612
Georg Brandl4d73b572011-01-13 07:13:06 +0000613 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000614 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615
616 If the argument is omitted, None, or negative, reads and
617 returns all data until EOF.
618
619 If the argument is positive, and the underlying raw stream is
620 not 'interactive', multiple raw reads may be issued to satisfy
621 the byte count (unless EOF is reached first). But for
622 interactive raw streams (XXX and for pipes?), at most one raw
623 read will be issued, and a short result does not imply that
624 EOF is imminent.
625
626 Returns an empty bytes array on EOF.
627
628 Raises BlockingIOError if the underlying raw stream has no
629 data at the moment.
630 """
631 self._unsupported("read")
632
Georg Brandl4d73b572011-01-13 07:13:06 +0000633 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000634 """Read up to n bytes with at most one read() system call,
635 where n is an int.
636 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 self._unsupported("read1")
638
Raymond Hettinger3c940242011-01-12 23:39:31 +0000639 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000640 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000641
642 Like read(), this may issue multiple reads to the underlying raw
643 stream, unless the latter is 'interactive'.
644
Raymond Hettingercbb80892011-01-13 18:15:51 +0000645 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646
647 Raises BlockingIOError if the underlying raw stream has no
648 data at the moment.
649 """
650 # XXX This ought to work with anything that supports the buffer API
651 data = self.read(len(b))
652 n = len(data)
653 try:
654 b[:n] = data
655 except TypeError as err:
656 import array
657 if not isinstance(b, array.array):
658 raise err
659 b[:n] = array.array('b', data)
660 return n
661
Raymond Hettinger3c940242011-01-12 23:39:31 +0000662 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000663 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 Return the number of bytes written, which is never less than
666 len(b).
667
668 Raises BlockingIOError if the buffer is full and the
669 underlying raw stream cannot accept more data at the moment.
670 """
671 self._unsupported("write")
672
Raymond Hettinger3c940242011-01-12 23:39:31 +0000673 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000674 """
675 Separate the underlying raw stream from the buffer and return it.
676
677 After the raw stream has been detached, the buffer is in an unusable
678 state.
679 """
680 self._unsupported("detach")
681
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682io.BufferedIOBase.register(BufferedIOBase)
683
684
685class _BufferedIOMixin(BufferedIOBase):
686
687 """A mixin implementation of BufferedIOBase with an underlying raw stream.
688
689 This passes most requests on to the underlying raw stream. It
690 does *not* provide implementations of read(), readinto() or
691 write().
692 """
693
694 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000695 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
697 ### Positioning ###
698
699 def seek(self, pos, whence=0):
700 new_position = self.raw.seek(pos, whence)
701 if new_position < 0:
702 raise IOError("seek() returned an invalid position")
703 return new_position
704
705 def tell(self):
706 pos = self.raw.tell()
707 if pos < 0:
708 raise IOError("tell() returned an invalid position")
709 return pos
710
711 def truncate(self, pos=None):
712 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
713 # and a flush may be necessary to synch both views of the current
714 # file state.
715 self.flush()
716
717 if pos is None:
718 pos = self.tell()
719 # XXX: Should seek() be used, instead of passing the position
720 # XXX directly to truncate?
721 return self.raw.truncate(pos)
722
723 ### Flush and close ###
724
725 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000726 if self.closed:
727 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 self.raw.flush()
729
730 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000731 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100732 try:
733 # may raise BlockingIOError or BrokenPipeError etc
734 self.flush()
735 finally:
736 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000738 def detach(self):
739 if self.raw is None:
740 raise ValueError("raw stream already detached")
741 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000742 raw = self._raw
743 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000744 return raw
745
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746 ### Inquiries ###
747
748 def seekable(self):
749 return self.raw.seekable()
750
751 def readable(self):
752 return self.raw.readable()
753
754 def writable(self):
755 return self.raw.writable()
756
757 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000758 def raw(self):
759 return self._raw
760
761 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762 def closed(self):
763 return self.raw.closed
764
765 @property
766 def name(self):
767 return self.raw.name
768
769 @property
770 def mode(self):
771 return self.raw.mode
772
Antoine Pitrou243757e2010-11-05 21:15:39 +0000773 def __getstate__(self):
774 raise TypeError("can not serialize a '{0}' object"
775 .format(self.__class__.__name__))
776
Antoine Pitrou716c4442009-05-23 19:04:03 +0000777 def __repr__(self):
778 clsname = self.__class__.__name__
779 try:
780 name = self.name
781 except AttributeError:
782 return "<_pyio.{0}>".format(clsname)
783 else:
784 return "<_pyio.{0} name={1!r}>".format(clsname, name)
785
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786 ### Lower-level APIs ###
787
788 def fileno(self):
789 return self.raw.fileno()
790
791 def isatty(self):
792 return self.raw.isatty()
793
794
795class BytesIO(BufferedIOBase):
796
797 """Buffered I/O implementation using an in-memory bytes buffer."""
798
799 def __init__(self, initial_bytes=None):
800 buf = bytearray()
801 if initial_bytes is not None:
802 buf += initial_bytes
803 self._buffer = buf
804 self._pos = 0
805
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000806 def __getstate__(self):
807 if self.closed:
808 raise ValueError("__getstate__ on closed file")
809 return self.__dict__.copy()
810
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 def getvalue(self):
812 """Return the bytes value (contents) of the buffer
813 """
814 if self.closed:
815 raise ValueError("getvalue on closed file")
816 return bytes(self._buffer)
817
Antoine Pitrou972ee132010-09-06 18:48:21 +0000818 def getbuffer(self):
819 """Return a readable and writable view of the buffer.
820 """
821 return memoryview(self._buffer)
822
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 def read(self, n=None):
824 if self.closed:
825 raise ValueError("read from closed file")
826 if n is None:
827 n = -1
828 if n < 0:
829 n = len(self._buffer)
830 if len(self._buffer) <= self._pos:
831 return b""
832 newpos = min(len(self._buffer), self._pos + n)
833 b = self._buffer[self._pos : newpos]
834 self._pos = newpos
835 return bytes(b)
836
837 def read1(self, n):
838 """This is the same as read.
839 """
840 return self.read(n)
841
842 def write(self, b):
843 if self.closed:
844 raise ValueError("write to closed file")
845 if isinstance(b, str):
846 raise TypeError("can't write str to binary stream")
847 n = len(b)
848 if n == 0:
849 return 0
850 pos = self._pos
851 if pos > len(self._buffer):
852 # Inserts null bytes between the current end of the file
853 # and the new write position.
854 padding = b'\x00' * (pos - len(self._buffer))
855 self._buffer += padding
856 self._buffer[pos:pos + n] = b
857 self._pos += n
858 return n
859
860 def seek(self, pos, whence=0):
861 if self.closed:
862 raise ValueError("seek on closed file")
863 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000864 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 except AttributeError as err:
866 raise TypeError("an integer is required") from err
867 if whence == 0:
868 if pos < 0:
869 raise ValueError("negative seek position %r" % (pos,))
870 self._pos = pos
871 elif whence == 1:
872 self._pos = max(0, self._pos + pos)
873 elif whence == 2:
874 self._pos = max(0, len(self._buffer) + pos)
875 else:
Jesus Cea94363612012-06-22 18:32:07 +0200876 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877 return self._pos
878
879 def tell(self):
880 if self.closed:
881 raise ValueError("tell on closed file")
882 return self._pos
883
884 def truncate(self, pos=None):
885 if self.closed:
886 raise ValueError("truncate on closed file")
887 if pos is None:
888 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000889 else:
890 try:
891 pos.__index__
892 except AttributeError as err:
893 raise TypeError("an integer is required") from err
894 if pos < 0:
895 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000897 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000898
899 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200900 if self.closed:
901 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 return True
903
904 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200905 if self.closed:
906 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000907 return True
908
909 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200910 if self.closed:
911 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000912 return True
913
914
915class BufferedReader(_BufferedIOMixin):
916
917 """BufferedReader(raw[, buffer_size])
918
919 A buffer for a readable, sequential BaseRawIO object.
920
921 The constructor creates a BufferedReader for the given readable raw
922 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
923 is used.
924 """
925
926 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
927 """Create a new buffered reader using the given readable raw IO object.
928 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000929 if not raw.readable():
930 raise IOError('"raw" argument must be readable.')
931
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000932 _BufferedIOMixin.__init__(self, raw)
933 if buffer_size <= 0:
934 raise ValueError("invalid buffer size")
935 self.buffer_size = buffer_size
936 self._reset_read_buf()
937 self._read_lock = Lock()
938
939 def _reset_read_buf(self):
940 self._read_buf = b""
941 self._read_pos = 0
942
943 def read(self, n=None):
944 """Read n bytes.
945
946 Returns exactly n bytes of data unless the underlying raw IO
947 stream reaches EOF or if the call would block in non-blocking
948 mode. If n is negative, read until EOF or until read() would
949 block.
950 """
951 if n is not None and n < -1:
952 raise ValueError("invalid number of bytes to read")
953 with self._read_lock:
954 return self._read_unlocked(n)
955
956 def _read_unlocked(self, n=None):
957 nodata_val = b""
958 empty_values = (b"", None)
959 buf = self._read_buf
960 pos = self._read_pos
961
962 # Special case for when the number of bytes to read is unspecified.
963 if n is None or n == -1:
964 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200965 if hasattr(self.raw, 'readall'):
966 chunk = self.raw.readall()
967 if chunk is None:
968 return buf[pos:] or None
969 else:
970 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000971 chunks = [buf[pos:]] # Strip the consumed bytes.
972 current_size = 0
973 while True:
974 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000975 try:
976 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200977 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000978 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000979 if chunk in empty_values:
980 nodata_val = chunk
981 break
982 current_size += len(chunk)
983 chunks.append(chunk)
984 return b"".join(chunks) or nodata_val
985
986 # The number of bytes to read is specified, return at most n bytes.
987 avail = len(buf) - pos # Length of the available buffered data.
988 if n <= avail:
989 # Fast path: the data to read is fully buffered.
990 self._read_pos += n
991 return buf[pos:pos+n]
992 # Slow path: read from the stream until enough bytes are read,
993 # or until an EOF occurs or until read() would block.
994 chunks = [buf[pos:]]
995 wanted = max(self.buffer_size, n)
996 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000997 try:
998 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200999 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001000 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001001 if chunk in empty_values:
1002 nodata_val = chunk
1003 break
1004 avail += len(chunk)
1005 chunks.append(chunk)
1006 # n is more then avail only when an EOF occurred or when
1007 # read() would have blocked.
1008 n = min(n, avail)
1009 out = b"".join(chunks)
1010 self._read_buf = out[n:] # Save the extra data in the buffer.
1011 self._read_pos = 0
1012 return out[:n] if out else nodata_val
1013
1014 def peek(self, n=0):
1015 """Returns buffered bytes without advancing the position.
1016
1017 The argument indicates a desired minimal number of bytes; we
1018 do at most one raw read to satisfy it. We never return more
1019 than self.buffer_size.
1020 """
1021 with self._read_lock:
1022 return self._peek_unlocked(n)
1023
1024 def _peek_unlocked(self, n=0):
1025 want = min(n, self.buffer_size)
1026 have = len(self._read_buf) - self._read_pos
1027 if have < want or have <= 0:
1028 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001029 while True:
1030 try:
1031 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001032 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001033 continue
1034 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 if current:
1036 self._read_buf = self._read_buf[self._read_pos:] + current
1037 self._read_pos = 0
1038 return self._read_buf[self._read_pos:]
1039
1040 def read1(self, n):
1041 """Reads up to n bytes, with at most one read() system call."""
1042 # Returns up to n bytes. If at least one byte is buffered, we
1043 # only return buffered bytes. Otherwise, we do one raw read.
1044 if n < 0:
1045 raise ValueError("number of bytes to read must be positive")
1046 if n == 0:
1047 return b""
1048 with self._read_lock:
1049 self._peek_unlocked(1)
1050 return self._read_unlocked(
1051 min(n, len(self._read_buf) - self._read_pos))
1052
1053 def tell(self):
1054 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1055
1056 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001057 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001058 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 with self._read_lock:
1060 if whence == 1:
1061 pos -= len(self._read_buf) - self._read_pos
1062 pos = _BufferedIOMixin.seek(self, pos, whence)
1063 self._reset_read_buf()
1064 return pos
1065
1066class BufferedWriter(_BufferedIOMixin):
1067
1068 """A buffer for a writeable sequential RawIO object.
1069
1070 The constructor creates a BufferedWriter for the given writeable raw
1071 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001072 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 """
1074
Florent Xicluna109d5732012-07-07 17:03:22 +02001075 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001076 if not raw.writable():
1077 raise IOError('"raw" argument must be writable.')
1078
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 _BufferedIOMixin.__init__(self, raw)
1080 if buffer_size <= 0:
1081 raise ValueError("invalid buffer size")
1082 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 self._write_buf = bytearray()
1084 self._write_lock = Lock()
1085
1086 def write(self, b):
1087 if self.closed:
1088 raise ValueError("write to closed file")
1089 if isinstance(b, str):
1090 raise TypeError("can't write str to binary stream")
1091 with self._write_lock:
1092 # XXX we can implement some more tricks to try and avoid
1093 # partial writes
1094 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001095 # We're full, so let's pre-flush the buffer. (This may
1096 # raise BlockingIOError with characters_written == 0.)
1097 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 before = len(self._write_buf)
1099 self._write_buf.extend(b)
1100 written = len(self._write_buf) - before
1101 if len(self._write_buf) > self.buffer_size:
1102 try:
1103 self._flush_unlocked()
1104 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001105 if len(self._write_buf) > self.buffer_size:
1106 # We've hit the buffer_size. We have to accept a partial
1107 # write and cut back our buffer.
1108 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001110 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001111 raise BlockingIOError(e.errno, e.strerror, written)
1112 return written
1113
1114 def truncate(self, pos=None):
1115 with self._write_lock:
1116 self._flush_unlocked()
1117 if pos is None:
1118 pos = self.raw.tell()
1119 return self.raw.truncate(pos)
1120
1121 def flush(self):
1122 with self._write_lock:
1123 self._flush_unlocked()
1124
1125 def _flush_unlocked(self):
1126 if self.closed:
1127 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001128 while self._write_buf:
1129 try:
1130 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001131 except InterruptedError:
1132 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001133 except BlockingIOError:
1134 raise RuntimeError("self.raw should implement RawIOBase: it "
1135 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001136 if n is None:
1137 raise BlockingIOError(
1138 errno.EAGAIN,
1139 "write could not complete without blocking", 0)
1140 if n > len(self._write_buf) or n < 0:
1141 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143
1144 def tell(self):
1145 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1146
1147 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001148 if whence not in valid_seek_flags:
1149 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 with self._write_lock:
1151 self._flush_unlocked()
1152 return _BufferedIOMixin.seek(self, pos, whence)
1153
1154
1155class BufferedRWPair(BufferedIOBase):
1156
1157 """A buffered reader and writer object together.
1158
1159 A buffered reader object and buffered writer object put together to
1160 form a sequential IO object that can read and write. This is typically
1161 used with a socket or two-way pipe.
1162
1163 reader and writer are RawIOBase objects that are readable and
1164 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001165 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 """
1167
1168 # XXX The usefulness of this (compared to having two separate IO
1169 # objects) is questionable.
1170
Florent Xicluna109d5732012-07-07 17:03:22 +02001171 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 """Constructor.
1173
1174 The arguments are two RawIO instances.
1175 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001176 if not reader.readable():
1177 raise IOError('"reader" argument must be readable.')
1178
1179 if not writer.writable():
1180 raise IOError('"writer" argument must be writable.')
1181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001183 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184
1185 def read(self, n=None):
1186 if n is None:
1187 n = -1
1188 return self.reader.read(n)
1189
1190 def readinto(self, b):
1191 return self.reader.readinto(b)
1192
1193 def write(self, b):
1194 return self.writer.write(b)
1195
1196 def peek(self, n=0):
1197 return self.reader.peek(n)
1198
1199 def read1(self, n):
1200 return self.reader.read1(n)
1201
1202 def readable(self):
1203 return self.reader.readable()
1204
1205 def writable(self):
1206 return self.writer.writable()
1207
1208 def flush(self):
1209 return self.writer.flush()
1210
1211 def close(self):
1212 self.writer.close()
1213 self.reader.close()
1214
1215 def isatty(self):
1216 return self.reader.isatty() or self.writer.isatty()
1217
1218 @property
1219 def closed(self):
1220 return self.writer.closed
1221
1222
1223class BufferedRandom(BufferedWriter, BufferedReader):
1224
1225 """A buffered interface to random access streams.
1226
1227 The constructor creates a reader and writer for a seekable stream,
1228 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001229 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230 """
1231
Florent Xicluna109d5732012-07-07 17:03:22 +02001232 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 raw._checkSeekable()
1234 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001235 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236
1237 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001238 if whence not in valid_seek_flags:
1239 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240 self.flush()
1241 if self._read_buf:
1242 # Undo read ahead.
1243 with self._read_lock:
1244 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1245 # First do the raw seek, then empty the read buffer, so that
1246 # if the raw seek fails, we don't lose buffered data forever.
1247 pos = self.raw.seek(pos, whence)
1248 with self._read_lock:
1249 self._reset_read_buf()
1250 if pos < 0:
1251 raise IOError("seek() returned invalid position")
1252 return pos
1253
1254 def tell(self):
1255 if self._write_buf:
1256 return BufferedWriter.tell(self)
1257 else:
1258 return BufferedReader.tell(self)
1259
1260 def truncate(self, pos=None):
1261 if pos is None:
1262 pos = self.tell()
1263 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001264 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265
1266 def read(self, n=None):
1267 if n is None:
1268 n = -1
1269 self.flush()
1270 return BufferedReader.read(self, n)
1271
1272 def readinto(self, b):
1273 self.flush()
1274 return BufferedReader.readinto(self, b)
1275
1276 def peek(self, n=0):
1277 self.flush()
1278 return BufferedReader.peek(self, n)
1279
1280 def read1(self, n):
1281 self.flush()
1282 return BufferedReader.read1(self, n)
1283
1284 def write(self, b):
1285 if self._read_buf:
1286 # Undo readahead
1287 with self._read_lock:
1288 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1289 self._reset_read_buf()
1290 return BufferedWriter.write(self, b)
1291
1292
1293class TextIOBase(IOBase):
1294
1295 """Base class for text I/O.
1296
1297 This class provides a character and line based interface to stream
1298 I/O. There is no readinto method because Python's character strings
1299 are immutable. There is no public constructor.
1300 """
1301
Georg Brandl4d73b572011-01-13 07:13:06 +00001302 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001303 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 Read from underlying buffer until we have n characters or we hit EOF.
1306 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001307
1308 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 """
1310 self._unsupported("read")
1311
Raymond Hettinger3c940242011-01-12 23:39:31 +00001312 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001313 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001314 self._unsupported("write")
1315
Georg Brandl4d73b572011-01-13 07:13:06 +00001316 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001317 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 self._unsupported("truncate")
1319
Raymond Hettinger3c940242011-01-12 23:39:31 +00001320 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321 """Read until newline or EOF.
1322
1323 Returns an empty string if EOF is hit immediately.
1324 """
1325 self._unsupported("readline")
1326
Raymond Hettinger3c940242011-01-12 23:39:31 +00001327 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001328 """
1329 Separate the underlying buffer from the TextIOBase and return it.
1330
1331 After the underlying buffer has been detached, the TextIO is in an
1332 unusable state.
1333 """
1334 self._unsupported("detach")
1335
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 @property
1337 def encoding(self):
1338 """Subclasses should override."""
1339 return None
1340
1341 @property
1342 def newlines(self):
1343 """Line endings translated so far.
1344
1345 Only line endings translated during reading are considered.
1346
1347 Subclasses should override.
1348 """
1349 return None
1350
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001351 @property
1352 def errors(self):
1353 """Error setting of the decoder or encoder.
1354
1355 Subclasses should override."""
1356 return None
1357
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358io.TextIOBase.register(TextIOBase)
1359
1360
1361class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1362 r"""Codec used when reading a file in universal newlines mode. It wraps
1363 another incremental decoder, translating \r\n and \r into \n. It also
1364 records the types of newlines encountered. When used with
1365 translate=False, it ensures that the newline sequence is returned in
1366 one piece.
1367 """
1368 def __init__(self, decoder, translate, errors='strict'):
1369 codecs.IncrementalDecoder.__init__(self, errors=errors)
1370 self.translate = translate
1371 self.decoder = decoder
1372 self.seennl = 0
1373 self.pendingcr = False
1374
1375 def decode(self, input, final=False):
1376 # decode input (with the eventual \r from a previous pass)
1377 if self.decoder is None:
1378 output = input
1379 else:
1380 output = self.decoder.decode(input, final=final)
1381 if self.pendingcr and (output or final):
1382 output = "\r" + output
1383 self.pendingcr = False
1384
1385 # retain last \r even when not translating data:
1386 # then readline() is sure to get \r\n in one pass
1387 if output.endswith("\r") and not final:
1388 output = output[:-1]
1389 self.pendingcr = True
1390
1391 # Record which newlines are read
1392 crlf = output.count('\r\n')
1393 cr = output.count('\r') - crlf
1394 lf = output.count('\n') - crlf
1395 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1396 | (crlf and self._CRLF)
1397
1398 if self.translate:
1399 if crlf:
1400 output = output.replace("\r\n", "\n")
1401 if cr:
1402 output = output.replace("\r", "\n")
1403
1404 return output
1405
1406 def getstate(self):
1407 if self.decoder is None:
1408 buf = b""
1409 flag = 0
1410 else:
1411 buf, flag = self.decoder.getstate()
1412 flag <<= 1
1413 if self.pendingcr:
1414 flag |= 1
1415 return buf, flag
1416
1417 def setstate(self, state):
1418 buf, flag = state
1419 self.pendingcr = bool(flag & 1)
1420 if self.decoder is not None:
1421 self.decoder.setstate((buf, flag >> 1))
1422
1423 def reset(self):
1424 self.seennl = 0
1425 self.pendingcr = False
1426 if self.decoder is not None:
1427 self.decoder.reset()
1428
1429 _LF = 1
1430 _CR = 2
1431 _CRLF = 4
1432
1433 @property
1434 def newlines(self):
1435 return (None,
1436 "\n",
1437 "\r",
1438 ("\r", "\n"),
1439 "\r\n",
1440 ("\n", "\r\n"),
1441 ("\r", "\r\n"),
1442 ("\r", "\n", "\r\n")
1443 )[self.seennl]
1444
1445
1446class TextIOWrapper(TextIOBase):
1447
1448 r"""Character and line based layer over a BufferedIOBase object, buffer.
1449
1450 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001451 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452
1453 errors determines the strictness of encoding and decoding (see the
1454 codecs.register) and defaults to "strict".
1455
1456 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1457 handling of line endings. If it is None, universal newlines is
1458 enabled. With this enabled, on input, the lines endings '\n', '\r',
1459 or '\r\n' are translated to '\n' before being returned to the
1460 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001461 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462 legal values, that newline becomes the newline when the file is read
1463 and it is returned untranslated. On output, '\n' is converted to the
1464 newline.
1465
1466 If line_buffering is True, a call to flush is implied when a call to
1467 write contains a newline character.
1468 """
1469
1470 _CHUNK_SIZE = 2048
1471
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001472 # The write_through argument has no effect here since this
1473 # implementation always writes through. The argument is present only
1474 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001475 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001476 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001477 if newline is not None and not isinstance(newline, str):
1478 raise TypeError("illegal newline type: %r" % (type(newline),))
1479 if newline not in (None, "", "\n", "\r", "\r\n"):
1480 raise ValueError("illegal newline value: %r" % (newline,))
1481 if encoding is None:
1482 try:
1483 encoding = os.device_encoding(buffer.fileno())
1484 except (AttributeError, UnsupportedOperation):
1485 pass
1486 if encoding is None:
1487 try:
1488 import locale
1489 except ImportError:
1490 # Importing locale may fail if Python is being built
1491 encoding = "ascii"
1492 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001493 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001494
1495 if not isinstance(encoding, str):
1496 raise ValueError("invalid encoding: %r" % encoding)
1497
1498 if errors is None:
1499 errors = "strict"
1500 else:
1501 if not isinstance(errors, str):
1502 raise ValueError("invalid errors: %r" % errors)
1503
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001504 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 self._line_buffering = line_buffering
1506 self._encoding = encoding
1507 self._errors = errors
1508 self._readuniversal = not newline
1509 self._readtranslate = newline is None
1510 self._readnl = newline
1511 self._writetranslate = newline != ''
1512 self._writenl = newline or os.linesep
1513 self._encoder = None
1514 self._decoder = None
1515 self._decoded_chars = '' # buffer for text returned from decoder
1516 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1517 self._snapshot = None # info for reconstructing decoder state
1518 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001519 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001520 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521
Antoine Pitroue4501852009-05-14 18:55:55 +00001522 if self._seekable and self.writable():
1523 position = self.buffer.tell()
1524 if position != 0:
1525 try:
1526 self._get_encoder().setstate(0)
1527 except LookupError:
1528 # Sometimes the encoder doesn't exist
1529 pass
1530
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1532 # where dec_flags is the second (integer) item of the decoder state
1533 # and next_input is the chunk of input bytes that comes next after the
1534 # snapshot point. We use this to reconstruct decoder states in tell().
1535
1536 # Naming convention:
1537 # - "bytes_..." for integer variables that count input bytes
1538 # - "chars_..." for integer variables that count decoded characters
1539
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001540 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001541 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001542 try:
1543 name = self.name
1544 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001545 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001546 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001547 result += " name={0!r}".format(name)
1548 try:
1549 mode = self.mode
1550 except AttributeError:
1551 pass
1552 else:
1553 result += " mode={0!r}".format(mode)
1554 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001555
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001556 @property
1557 def encoding(self):
1558 return self._encoding
1559
1560 @property
1561 def errors(self):
1562 return self._errors
1563
1564 @property
1565 def line_buffering(self):
1566 return self._line_buffering
1567
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001568 @property
1569 def buffer(self):
1570 return self._buffer
1571
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001573 if self.closed:
1574 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001575 return self._seekable
1576
1577 def readable(self):
1578 return self.buffer.readable()
1579
1580 def writable(self):
1581 return self.buffer.writable()
1582
1583 def flush(self):
1584 self.buffer.flush()
1585 self._telling = self._seekable
1586
1587 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001588 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06001589 try:
1590 self.flush()
1591 finally:
1592 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593
1594 @property
1595 def closed(self):
1596 return self.buffer.closed
1597
1598 @property
1599 def name(self):
1600 return self.buffer.name
1601
1602 def fileno(self):
1603 return self.buffer.fileno()
1604
1605 def isatty(self):
1606 return self.buffer.isatty()
1607
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001608 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001609 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610 if self.closed:
1611 raise ValueError("write to closed file")
1612 if not isinstance(s, str):
1613 raise TypeError("can't write %s to text stream" %
1614 s.__class__.__name__)
1615 length = len(s)
1616 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1617 if haslf and self._writetranslate and self._writenl != "\n":
1618 s = s.replace("\n", self._writenl)
1619 encoder = self._encoder or self._get_encoder()
1620 # XXX What if we were just reading?
1621 b = encoder.encode(s)
1622 self.buffer.write(b)
1623 if self._line_buffering and (haslf or "\r" in s):
1624 self.flush()
1625 self._snapshot = None
1626 if self._decoder:
1627 self._decoder.reset()
1628 return length
1629
1630 def _get_encoder(self):
1631 make_encoder = codecs.getincrementalencoder(self._encoding)
1632 self._encoder = make_encoder(self._errors)
1633 return self._encoder
1634
1635 def _get_decoder(self):
1636 make_decoder = codecs.getincrementaldecoder(self._encoding)
1637 decoder = make_decoder(self._errors)
1638 if self._readuniversal:
1639 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1640 self._decoder = decoder
1641 return decoder
1642
1643 # The following three methods implement an ADT for _decoded_chars.
1644 # Text returned from the decoder is buffered here until the client
1645 # requests it by calling our read() or readline() method.
1646 def _set_decoded_chars(self, chars):
1647 """Set the _decoded_chars buffer."""
1648 self._decoded_chars = chars
1649 self._decoded_chars_used = 0
1650
1651 def _get_decoded_chars(self, n=None):
1652 """Advance into the _decoded_chars buffer."""
1653 offset = self._decoded_chars_used
1654 if n is None:
1655 chars = self._decoded_chars[offset:]
1656 else:
1657 chars = self._decoded_chars[offset:offset + n]
1658 self._decoded_chars_used += len(chars)
1659 return chars
1660
1661 def _rewind_decoded_chars(self, n):
1662 """Rewind the _decoded_chars buffer."""
1663 if self._decoded_chars_used < n:
1664 raise AssertionError("rewind decoded_chars out of bounds")
1665 self._decoded_chars_used -= n
1666
1667 def _read_chunk(self):
1668 """
1669 Read and decode the next chunk of data from the BufferedReader.
1670 """
1671
1672 # The return value is True unless EOF was reached. The decoded
1673 # string is placed in self._decoded_chars (replacing its previous
1674 # value). The entire input chunk is sent to the decoder, though
1675 # some of it may remain buffered in the decoder, yet to be
1676 # converted.
1677
1678 if self._decoder is None:
1679 raise ValueError("no decoder")
1680
1681 if self._telling:
1682 # To prepare for tell(), we need to snapshot a point in the
1683 # file where the decoder's input buffer is empty.
1684
1685 dec_buffer, dec_flags = self._decoder.getstate()
1686 # Given this, we know there was a valid snapshot point
1687 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1688
1689 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001690 if self._has_read1:
1691 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1692 else:
1693 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001695 decoded_chars = self._decoder.decode(input_chunk, eof)
1696 self._set_decoded_chars(decoded_chars)
1697 if decoded_chars:
1698 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1699 else:
1700 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701
1702 if self._telling:
1703 # At the snapshot point, len(dec_buffer) bytes before the read,
1704 # the next input to be decoded is dec_buffer + input_chunk.
1705 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1706
1707 return not eof
1708
1709 def _pack_cookie(self, position, dec_flags=0,
1710 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1711 # The meaning of a tell() cookie is: seek to position, set the
1712 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1713 # into the decoder with need_eof as the EOF flag, then skip
1714 # chars_to_skip characters of the decoded result. For most simple
1715 # decoders, tell() will often just give a byte offset in the file.
1716 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1717 (chars_to_skip<<192) | bool(need_eof)<<256)
1718
1719 def _unpack_cookie(self, bigint):
1720 rest, position = divmod(bigint, 1<<64)
1721 rest, dec_flags = divmod(rest, 1<<64)
1722 rest, bytes_to_feed = divmod(rest, 1<<64)
1723 need_eof, chars_to_skip = divmod(rest, 1<<64)
1724 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1725
1726 def tell(self):
1727 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001728 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 if not self._telling:
1730 raise IOError("telling position disabled by next() call")
1731 self.flush()
1732 position = self.buffer.tell()
1733 decoder = self._decoder
1734 if decoder is None or self._snapshot is None:
1735 if self._decoded_chars:
1736 # This should never happen.
1737 raise AssertionError("pending decoded text")
1738 return position
1739
1740 # Skip backward to the snapshot point (see _read_chunk).
1741 dec_flags, next_input = self._snapshot
1742 position -= len(next_input)
1743
1744 # How many decoded characters have been used up since the snapshot?
1745 chars_to_skip = self._decoded_chars_used
1746 if chars_to_skip == 0:
1747 # We haven't moved from the snapshot point.
1748 return self._pack_cookie(position, dec_flags)
1749
1750 # Starting from the snapshot position, we will walk the decoder
1751 # forward until it gives us enough decoded characters.
1752 saved_state = decoder.getstate()
1753 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001754 # Fast search for an acceptable start point, close to our
1755 # current pos.
1756 # Rationale: calling decoder.decode() has a large overhead
1757 # regardless of chunk size; we want the number of such calls to
1758 # be O(1) in most situations (common decoders, non-crazy input).
1759 # Actually, it will be exactly 1 for fixed-size codecs (all
1760 # 8-bit codecs, also UTF-16 and UTF-32).
1761 skip_bytes = int(self._b2cratio * chars_to_skip)
1762 skip_back = 1
1763 assert skip_bytes <= len(next_input)
1764 while skip_bytes > 0:
1765 decoder.setstate((b'', dec_flags))
1766 # Decode up to temptative start point
1767 n = len(decoder.decode(next_input[:skip_bytes]))
1768 if n <= chars_to_skip:
1769 b, d = decoder.getstate()
1770 if not b:
1771 # Before pos and no bytes buffered in decoder => OK
1772 dec_flags = d
1773 chars_to_skip -= n
1774 break
1775 # Skip back by buffered amount and reset heuristic
1776 skip_bytes -= len(b)
1777 skip_back = 1
1778 else:
1779 # We're too far ahead, skip back a bit
1780 skip_bytes -= skip_back
1781 skip_back = skip_back * 2
1782 else:
1783 skip_bytes = 0
1784 decoder.setstate((b'', dec_flags))
1785
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001787 start_pos = position + skip_bytes
1788 start_flags = dec_flags
1789 if chars_to_skip == 0:
1790 # We haven't moved from the start point.
1791 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001792
1793 # Feed the decoder one byte at a time. As we go, note the
1794 # nearest "safe start point" before the current location
1795 # (a point where the decoder has nothing buffered, so seek()
1796 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001797 bytes_fed = 0
1798 need_eof = 0
1799 # Chars decoded since `start_pos`
1800 chars_decoded = 0
1801 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001803 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 dec_buffer, dec_flags = decoder.getstate()
1805 if not dec_buffer and chars_decoded <= chars_to_skip:
1806 # Decoder buffer is empty, so this is a safe start point.
1807 start_pos += bytes_fed
1808 chars_to_skip -= chars_decoded
1809 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1810 if chars_decoded >= chars_to_skip:
1811 break
1812 else:
1813 # We didn't get enough decoded data; signal EOF to get more.
1814 chars_decoded += len(decoder.decode(b'', final=True))
1815 need_eof = 1
1816 if chars_decoded < chars_to_skip:
1817 raise IOError("can't reconstruct logical file position")
1818
1819 # The returned cookie corresponds to the last safe start point.
1820 return self._pack_cookie(
1821 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1822 finally:
1823 decoder.setstate(saved_state)
1824
1825 def truncate(self, pos=None):
1826 self.flush()
1827 if pos is None:
1828 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001829 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001831 def detach(self):
1832 if self.buffer is None:
1833 raise ValueError("buffer is already detached")
1834 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001835 buffer = self._buffer
1836 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001837 return buffer
1838
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 def seek(self, cookie, whence=0):
1840 if self.closed:
1841 raise ValueError("tell on closed file")
1842 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001843 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844 if whence == 1: # seek relative to current position
1845 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001846 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847 # Seeking to the current position should attempt to
1848 # sync the underlying buffer with the current position.
1849 whence = 0
1850 cookie = self.tell()
1851 if whence == 2: # seek relative to end of file
1852 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001853 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 self.flush()
1855 position = self.buffer.seek(0, 2)
1856 self._set_decoded_chars('')
1857 self._snapshot = None
1858 if self._decoder:
1859 self._decoder.reset()
1860 return position
1861 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001862 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863 if cookie < 0:
1864 raise ValueError("negative seek position %r" % (cookie,))
1865 self.flush()
1866
1867 # The strategy of seek() is to go back to the safe start point
1868 # and replay the effect of read(chars_to_skip) from there.
1869 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1870 self._unpack_cookie(cookie)
1871
1872 # Seek back to the safe start point.
1873 self.buffer.seek(start_pos)
1874 self._set_decoded_chars('')
1875 self._snapshot = None
1876
1877 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001878 if cookie == 0 and self._decoder:
1879 self._decoder.reset()
1880 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881 self._decoder = self._decoder or self._get_decoder()
1882 self._decoder.setstate((b'', dec_flags))
1883 self._snapshot = (dec_flags, b'')
1884
1885 if chars_to_skip:
1886 # Just like _read_chunk, feed the decoder and save a snapshot.
1887 input_chunk = self.buffer.read(bytes_to_feed)
1888 self._set_decoded_chars(
1889 self._decoder.decode(input_chunk, need_eof))
1890 self._snapshot = (dec_flags, input_chunk)
1891
1892 # Skip chars_to_skip of the decoded characters.
1893 if len(self._decoded_chars) < chars_to_skip:
1894 raise IOError("can't restore logical file position")
1895 self._decoded_chars_used = chars_to_skip
1896
Antoine Pitroue4501852009-05-14 18:55:55 +00001897 # Finally, reset the encoder (merely useful for proper BOM handling)
1898 try:
1899 encoder = self._encoder or self._get_encoder()
1900 except LookupError:
1901 # Sometimes the encoder doesn't exist
1902 pass
1903 else:
1904 if cookie != 0:
1905 encoder.setstate(0)
1906 else:
1907 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 return cookie
1909
1910 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001911 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912 if n is None:
1913 n = -1
1914 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001915 try:
1916 n.__index__
1917 except AttributeError as err:
1918 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919 if n < 0:
1920 # Read everything.
1921 result = (self._get_decoded_chars() +
1922 decoder.decode(self.buffer.read(), final=True))
1923 self._set_decoded_chars('')
1924 self._snapshot = None
1925 return result
1926 else:
1927 # Keep reading chunks until we have n characters to return.
1928 eof = False
1929 result = self._get_decoded_chars(n)
1930 while len(result) < n and not eof:
1931 eof = not self._read_chunk()
1932 result += self._get_decoded_chars(n - len(result))
1933 return result
1934
1935 def __next__(self):
1936 self._telling = False
1937 line = self.readline()
1938 if not line:
1939 self._snapshot = None
1940 self._telling = self._seekable
1941 raise StopIteration
1942 return line
1943
1944 def readline(self, limit=None):
1945 if self.closed:
1946 raise ValueError("read from closed file")
1947 if limit is None:
1948 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001949 elif not isinstance(limit, int):
1950 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951
1952 # Grab all the decoded text (we will rewind any extra bits later).
1953 line = self._get_decoded_chars()
1954
1955 start = 0
1956 # Make the decoder if it doesn't already exist.
1957 if not self._decoder:
1958 self._get_decoder()
1959
1960 pos = endpos = None
1961 while True:
1962 if self._readtranslate:
1963 # Newlines are already translated, only search for \n
1964 pos = line.find('\n', start)
1965 if pos >= 0:
1966 endpos = pos + 1
1967 break
1968 else:
1969 start = len(line)
1970
1971 elif self._readuniversal:
1972 # Universal newline search. Find any of \r, \r\n, \n
1973 # The decoder ensures that \r\n are not split in two pieces
1974
1975 # In C we'd look for these in parallel of course.
1976 nlpos = line.find("\n", start)
1977 crpos = line.find("\r", start)
1978 if crpos == -1:
1979 if nlpos == -1:
1980 # Nothing found
1981 start = len(line)
1982 else:
1983 # Found \n
1984 endpos = nlpos + 1
1985 break
1986 elif nlpos == -1:
1987 # Found lone \r
1988 endpos = crpos + 1
1989 break
1990 elif nlpos < crpos:
1991 # Found \n
1992 endpos = nlpos + 1
1993 break
1994 elif nlpos == crpos + 1:
1995 # Found \r\n
1996 endpos = crpos + 2
1997 break
1998 else:
1999 # Found \r
2000 endpos = crpos + 1
2001 break
2002 else:
2003 # non-universal
2004 pos = line.find(self._readnl)
2005 if pos >= 0:
2006 endpos = pos + len(self._readnl)
2007 break
2008
2009 if limit >= 0 and len(line) >= limit:
2010 endpos = limit # reached length limit
2011 break
2012
2013 # No line ending seen yet - get more data'
2014 while self._read_chunk():
2015 if self._decoded_chars:
2016 break
2017 if self._decoded_chars:
2018 line += self._get_decoded_chars()
2019 else:
2020 # end of file
2021 self._set_decoded_chars('')
2022 self._snapshot = None
2023 return line
2024
2025 if limit >= 0 and endpos > limit:
2026 endpos = limit # don't exceed limit
2027
2028 # Rewind _decoded_chars to just after the line ending we found.
2029 self._rewind_decoded_chars(len(line) - endpos)
2030 return line[:endpos]
2031
2032 @property
2033 def newlines(self):
2034 return self._decoder.newlines if self._decoder else None
2035
2036
2037class StringIO(TextIOWrapper):
2038 """Text I/O implementation using an in-memory buffer.
2039
2040 The initial_value argument sets the value of object. The newline
2041 argument is like the one of TextIOWrapper's constructor.
2042 """
2043
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 def __init__(self, initial_value="", newline="\n"):
2045 super(StringIO, self).__init__(BytesIO(),
2046 encoding="utf-8",
2047 errors="strict",
2048 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002049 # Issue #5645: make universal newlines semantics the same as in the
2050 # C version, even under Windows.
2051 if newline is None:
2052 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002053 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002055 raise TypeError("initial_value must be str or None, not {0}"
2056 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057 initial_value = str(initial_value)
2058 self.write(initial_value)
2059 self.seek(0)
2060
2061 def getvalue(self):
2062 self.flush()
2063 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002064
2065 def __repr__(self):
2066 # TextIOWrapper tells the encoding in its repr. In StringIO,
2067 # that's a implementation detail.
2068 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002069
2070 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002071 def errors(self):
2072 return None
2073
2074 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002075 def encoding(self):
2076 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002077
2078 def detach(self):
2079 # This doesn't make sense on StringIO.
2080 self._unsupported("detach")