blob: fa77ec17f5f9a09ed09f69e9298de9142c5283b9 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
Jesus Cea94363612012-06-22 18:32:07 +020018valid_seek_flags = {0, 1, 2} # Hardwired values
19if hasattr(os, 'SEEK_HOLE') :
20 valid_seek_flags.add(os.SEEK_HOLE)
21 valid_seek_flags.add(os.SEEK_DATA)
22
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020030# Rebind for compatibility
31BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33
Georg Brandl4d73b572011-01-13 07:13:06 +000034def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020035 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
37 r"""Open file and return a stream. Raise IOError upon failure.
38
39 file is either a text or byte string giving the name (and the path
40 if the file isn't in the current working directory) of the file to
41 be opened or an integer file descriptor of the file to be
42 wrapped. (If a file descriptor is given, it is closed when the
43 returned I/O object is closed, unless closefd is set to False.)
44
Charles-François Natalidc3044c2012-01-09 22:40:02 +010045 mode is an optional string that specifies the mode in which the file is
46 opened. It defaults to 'r' which means open for reading in text mode. Other
47 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010048 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010049 (which on some Unix systems, means that all writes append to the end of the
50 file regardless of the current seek position). In text mode, if encoding is
51 not specified the encoding used is platform dependent. (For reading and
52 writing raw bytes use binary mode and leave encoding unspecified.) The
53 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000054
55 ========= ===============================================================
56 Character Meaning
57 --------- ---------------------------------------------------------------
58 'r' open for reading (default)
59 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010060 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061 'a' open for writing, appending to the end of the file if it exists
62 'b' binary mode
63 't' text mode (default)
64 '+' open a disk file for updating (reading and writing)
65 'U' universal newline mode (for backwards compatibility; unneeded
66 for new code)
67 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Antoine Pitroud5587bc2009-12-19 21:08:31 +000082 buffering is an optional integer used to set the buffering policy.
83 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
84 line buffering (only usable in text mode), and an integer > 1 to indicate
85 the size of a fixed-size chunk buffer. When no buffering argument is
86 given, the default buffering policy works as follows:
87
88 * Binary files are buffered in fixed-size chunks; the size of the buffer
89 is chosen using a heuristic trying to determine the underlying device's
90 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
91 On many systems, the buffer will typically be 4096 or 8192 bytes long.
92
93 * "Interactive" text files (files for which isatty() returns True)
94 use line buffering. Other text files use the policy described above
95 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096
Raymond Hettingercbb80892011-01-13 18:15:51 +000097 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098 file. This should only be used in text mode. The default encoding is
99 platform dependent, but any encoding supported by Python can be
100 passed. See the codecs module for the list of supported encodings.
101
102 errors is an optional string that specifies how encoding errors are to
103 be handled---this argument should not be used in binary mode. Pass
104 'strict' to raise a ValueError exception if there is an encoding error
105 (the default of None has the same effect), or pass 'ignore' to ignore
106 errors. (Note that ignoring encoding errors can lead to data loss.)
107 See the documentation for codecs.register for a list of the permitted
108 encoding error strings.
109
Raymond Hettingercbb80892011-01-13 18:15:51 +0000110 newline is a string controlling how universal newlines works (it only
111 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
112 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113
114 * On input, if newline is None, universal newlines mode is
115 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
116 these are translated into '\n' before being returned to the
117 caller. If it is '', universal newline mode is enabled, but line
118 endings are returned to the caller untranslated. If it has any of
119 the other legal values, input lines are only terminated by the given
120 string, and the line ending is returned to the caller untranslated.
121
122 * On output, if newline is None, any '\n' characters written are
123 translated to the system default line separator, os.linesep. If
124 newline is '', no translation takes place. If newline is any of the
125 other legal values, any '\n' characters written are translated to
126 the given string.
127
Raymond Hettingercbb80892011-01-13 18:15:51 +0000128 closedfd is a bool. If closefd is False, the underlying file descriptor will
129 be kept open when the file is closed. This does not work when a file name is
130 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131
Ross Lagerwall59142db2011-10-31 20:34:46 +0200132 A custom opener can be used by passing a callable as *opener*. The
133 underlying file descriptor for the file object is then obtained by calling
134 *opener* with (*file*, *flags*). *opener* must return an open file
135 descriptor (passing os.open as *opener* results in functionality similar to
136 passing None).
137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (str, bytes, int)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, str):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000156 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, str):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, str):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100163 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100165 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100180 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200194 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 line_buffering = False
196 if buffering == 1 or buffering < 0 and raw.isatty():
197 buffering = -1
198 line_buffering = True
199 if buffering < 0:
200 buffering = DEFAULT_BUFFER_SIZE
201 try:
202 bs = os.fstat(raw.fileno()).st_blksize
203 except (os.error, AttributeError):
204 pass
205 else:
206 if bs > 1:
207 buffering = bs
208 if buffering < 0:
209 raise ValueError("invalid buffering size")
210 if buffering == 0:
211 if binary:
212 return raw
213 raise ValueError("can't have unbuffered text I/O")
214 if updating:
215 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100216 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000217 buffer = BufferedWriter(raw, buffering)
218 elif reading:
219 buffer = BufferedReader(raw, buffering)
220 else:
221 raise ValueError("unknown mode: %r" % mode)
222 if binary:
223 return buffer
224 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225 text.mode = mode
226 return text
227
228
229class DocDescriptor:
230 """Helper for builtins.open.__doc__
231 """
232 def __get__(self, obj, typ):
233 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000234 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 "errors=None, newline=None, closefd=True)\n\n" +
236 open.__doc__)
237
238class OpenWrapper:
239 """Wrapper for builtins.open
240
241 Trick so that open won't become a bound method when stored
242 as a class variable (as dbm.dumb does).
243
244 See initstdio() in Python/pythonrun.c.
245 """
246 __doc__ = DocDescriptor()
247
248 def __new__(cls, *args, **kwargs):
249 return open(*args, **kwargs)
250
251
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000252# In normal operation, both `UnsupportedOperation`s should be bound to the
253# same object.
254try:
255 UnsupportedOperation = io.UnsupportedOperation
256except AttributeError:
257 class UnsupportedOperation(ValueError, IOError):
258 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259
260
261class IOBase(metaclass=abc.ABCMeta):
262
263 """The abstract base class for all I/O classes, acting on streams of
264 bytes. There is no public constructor.
265
266 This class provides dummy implementations for many methods that
267 derived classes can override selectively; the default implementations
268 represent a file that cannot be read, written or seeked.
269
270 Even though IOBase does not declare read, readinto, or write because
271 their signatures will vary, implementations and clients should
272 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000273 may raise UnsupportedOperation when operations they do not support are
274 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276 The basic type used for binary data read from or written to a file is
277 bytes. bytearrays are accepted too, and in some cases (such as
278 readinto) needed. Text I/O classes work with str data.
279
280 Note that calling any method (even inquiries) on a closed stream is
281 undefined. Implementations may raise IOError in this case.
282
283 IOBase (and its subclasses) support the iterator protocol, meaning
284 that an IOBase object can be iterated over yielding the lines in a
285 stream.
286
287 IOBase also supports the :keyword:`with` statement. In this example,
288 fp is closed after the suite of the with statement is complete:
289
290 with open('spam.txt', 'r') as fp:
291 fp.write('Spam and eggs!')
292 """
293
294 ### Internal ###
295
Raymond Hettinger3c940242011-01-12 23:39:31 +0000296 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000297 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 raise UnsupportedOperation("%s.%s() not supported" %
299 (self.__class__.__name__, name))
300
301 ### Positioning ###
302
Georg Brandl4d73b572011-01-13 07:13:06 +0000303 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304 """Change stream position.
305
306 Change the stream position to byte offset offset. offset is
307 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309
310 * 0 -- start of stream (the default); offset should be zero or positive
311 * 1 -- current stream position; offset may be negative
312 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200313 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314
Raymond Hettingercbb80892011-01-13 18:15:51 +0000315 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """
317 self._unsupported("seek")
318
Raymond Hettinger3c940242011-01-12 23:39:31 +0000319 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000320 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 return self.seek(0, 1)
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Truncate file to size bytes.
325
326 Size defaults to the current IO position as reported by tell(). Return
327 the new size.
328 """
329 self._unsupported("truncate")
330
331 ### Flush and close ###
332
Raymond Hettinger3c940242011-01-12 23:39:31 +0000333 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 """Flush write buffers, if applicable.
335
336 This is not implemented for read-only and non-blocking streams.
337 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000338 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 # XXX Should this return the number of bytes written???
340
341 __closed = False
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Flush and close the IO object.
345
346 This method has no effect if the file is already closed.
347 """
348 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000349 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self.__closed = True
351
Raymond Hettinger3c940242011-01-12 23:39:31 +0000352 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000353 """Destructor. Calls close()."""
354 # The try/except block is in case this is called at program
355 # exit time, when it's possible that globals have already been
356 # deleted, and then the close() call might fail. Since
357 # there's nothing we can do about such failures and they annoy
358 # the end users, we suppress the traceback.
359 try:
360 self.close()
361 except:
362 pass
363
364 ### Inquiries ###
365
Raymond Hettinger3c940242011-01-12 23:39:31 +0000366 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000367 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000369 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 This method may need to do a test seek().
371 """
372 return False
373
374 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000375 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """
377 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000378 raise UnsupportedOperation("File or stream is not seekable."
379 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
Raymond Hettinger3c940242011-01-12 23:39:31 +0000381 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000382 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000384 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385 """
386 return False
387
388 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000389 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390 """
391 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000392 raise UnsupportedOperation("File or stream is not readable."
393 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394
Raymond Hettinger3c940242011-01-12 23:39:31 +0000395 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000396 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000398 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399 """
400 return False
401
402 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000403 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 """
405 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000406 raise UnsupportedOperation("File or stream is not writable."
407 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408
409 @property
410 def closed(self):
411 """closed: bool. True iff the file has been closed.
412
413 For backwards compatibility, this is a property, not a predicate.
414 """
415 return self.__closed
416
417 def _checkClosed(self, msg=None):
418 """Internal: raise an ValueError if file is closed
419 """
420 if self.closed:
421 raise ValueError("I/O operation on closed file."
422 if msg is None else msg)
423
424 ### Context manager ###
425
Raymond Hettinger3c940242011-01-12 23:39:31 +0000426 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000427 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 self._checkClosed()
429 return self
430
Raymond Hettinger3c940242011-01-12 23:39:31 +0000431 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 """Context management protocol. Calls close()"""
433 self.close()
434
435 ### Lower-level APIs ###
436
437 # XXX Should these be present even if unimplemented?
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000440 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441
442 An IOError is raised if the IO object does not use a file descriptor.
443 """
444 self._unsupported("fileno")
445
Raymond Hettinger3c940242011-01-12 23:39:31 +0000446 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000447 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448
449 Return False if it can't be determined.
450 """
451 self._checkClosed()
452 return False
453
454 ### Readline[s] and writelines ###
455
Georg Brandl4d73b572011-01-13 07:13:06 +0000456 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000457 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458
459 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000460 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461
462 The line terminator is always b'\n' for binary files; for text
463 files, the newlines argument to open can be used to select the line
464 terminator(s) recognized.
465 """
466 # For backwards compatibility, a (slowish) readline().
467 if hasattr(self, "peek"):
468 def nreadahead():
469 readahead = self.peek(1)
470 if not readahead:
471 return 1
472 n = (readahead.find(b"\n") + 1) or len(readahead)
473 if limit >= 0:
474 n = min(n, limit)
475 return n
476 else:
477 def nreadahead():
478 return 1
479 if limit is None:
480 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000481 elif not isinstance(limit, int):
482 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 res = bytearray()
484 while limit < 0 or len(res) < limit:
485 b = self.read(nreadahead())
486 if not b:
487 break
488 res += b
489 if res.endswith(b"\n"):
490 break
491 return bytes(res)
492
493 def __iter__(self):
494 self._checkClosed()
495 return self
496
497 def __next__(self):
498 line = self.readline()
499 if not line:
500 raise StopIteration
501 return line
502
503 def readlines(self, hint=None):
504 """Return a list of lines from the stream.
505
506 hint can be specified to control the number of lines read: no more
507 lines will be read if the total size (in bytes/characters) of all
508 lines so far exceeds hint.
509 """
510 if hint is None or hint <= 0:
511 return list(self)
512 n = 0
513 lines = []
514 for line in self:
515 lines.append(line)
516 n += len(line)
517 if n >= hint:
518 break
519 return lines
520
521 def writelines(self, lines):
522 self._checkClosed()
523 for line in lines:
524 self.write(line)
525
526io.IOBase.register(IOBase)
527
528
529class RawIOBase(IOBase):
530
531 """Base class for raw binary I/O."""
532
533 # The read() method is implemented by calling readinto(); derived
534 # classes that want to support read() only need to implement
535 # readinto() as a primitive operation. In general, readinto() can be
536 # more efficient than read().
537
538 # (It would be tempting to also provide an implementation of
539 # readinto() in terms of read(), in case the latter is a more suitable
540 # primitive operation, but that would lead to nasty recursion in case
541 # a subclass doesn't implement either.)
542
Georg Brandl4d73b572011-01-13 07:13:06 +0000543 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000544 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545
546 Returns an empty bytes object on EOF, or None if the object is
547 set not to block and has no data to read.
548 """
549 if n is None:
550 n = -1
551 if n < 0:
552 return self.readall()
553 b = bytearray(n.__index__())
554 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000555 if n is None:
556 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557 del b[n:]
558 return bytes(b)
559
560 def readall(self):
561 """Read until EOF, using multiple read() call."""
562 res = bytearray()
563 while True:
564 data = self.read(DEFAULT_BUFFER_SIZE)
565 if not data:
566 break
567 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200568 if res:
569 return bytes(res)
570 else:
571 # b'' or None
572 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Raymond Hettinger3c940242011-01-12 23:39:31 +0000574 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000575 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576
Raymond Hettingercbb80892011-01-13 18:15:51 +0000577 Returns an int representing the number of bytes read (0 for EOF), or
578 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 """
580 self._unsupported("readinto")
581
Raymond Hettinger3c940242011-01-12 23:39:31 +0000582 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 """Write the given buffer to the IO stream.
584
585 Returns the number of bytes written, which may be less than len(b).
586 """
587 self._unsupported("write")
588
589io.RawIOBase.register(RawIOBase)
590from _io import FileIO
591RawIOBase.register(FileIO)
592
593
594class BufferedIOBase(IOBase):
595
596 """Base class for buffered IO objects.
597
598 The main difference with RawIOBase is that the read() method
599 supports omitting the size argument, and does not have a default
600 implementation that defers to readinto().
601
602 In addition, read(), readinto() and write() may raise
603 BlockingIOError if the underlying raw stream is in non-blocking
604 mode and not ready; unlike their raw counterparts, they will never
605 return None.
606
607 A typical implementation should not inherit from a RawIOBase
608 implementation, but wrap one.
609 """
610
Georg Brandl4d73b572011-01-13 07:13:06 +0000611 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000612 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613
614 If the argument is omitted, None, or negative, reads and
615 returns all data until EOF.
616
617 If the argument is positive, and the underlying raw stream is
618 not 'interactive', multiple raw reads may be issued to satisfy
619 the byte count (unless EOF is reached first). But for
620 interactive raw streams (XXX and for pipes?), at most one raw
621 read will be issued, and a short result does not imply that
622 EOF is imminent.
623
624 Returns an empty bytes array on EOF.
625
626 Raises BlockingIOError if the underlying raw stream has no
627 data at the moment.
628 """
629 self._unsupported("read")
630
Georg Brandl4d73b572011-01-13 07:13:06 +0000631 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000632 """Read up to n bytes with at most one read() system call,
633 where n is an int.
634 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 self._unsupported("read1")
636
Raymond Hettinger3c940242011-01-12 23:39:31 +0000637 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000638 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639
640 Like read(), this may issue multiple reads to the underlying raw
641 stream, unless the latter is 'interactive'.
642
Raymond Hettingercbb80892011-01-13 18:15:51 +0000643 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644
645 Raises BlockingIOError if the underlying raw stream has no
646 data at the moment.
647 """
648 # XXX This ought to work with anything that supports the buffer API
649 data = self.read(len(b))
650 n = len(data)
651 try:
652 b[:n] = data
653 except TypeError as err:
654 import array
655 if not isinstance(b, array.array):
656 raise err
657 b[:n] = array.array('b', data)
658 return n
659
Raymond Hettinger3c940242011-01-12 23:39:31 +0000660 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662
663 Return the number of bytes written, which is never less than
664 len(b).
665
666 Raises BlockingIOError if the buffer is full and the
667 underlying raw stream cannot accept more data at the moment.
668 """
669 self._unsupported("write")
670
Raymond Hettinger3c940242011-01-12 23:39:31 +0000671 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000672 """
673 Separate the underlying raw stream from the buffer and return it.
674
675 After the raw stream has been detached, the buffer is in an unusable
676 state.
677 """
678 self._unsupported("detach")
679
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680io.BufferedIOBase.register(BufferedIOBase)
681
682
683class _BufferedIOMixin(BufferedIOBase):
684
685 """A mixin implementation of BufferedIOBase with an underlying raw stream.
686
687 This passes most requests on to the underlying raw stream. It
688 does *not* provide implementations of read(), readinto() or
689 write().
690 """
691
692 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000693 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694
695 ### Positioning ###
696
697 def seek(self, pos, whence=0):
698 new_position = self.raw.seek(pos, whence)
699 if new_position < 0:
700 raise IOError("seek() returned an invalid position")
701 return new_position
702
703 def tell(self):
704 pos = self.raw.tell()
705 if pos < 0:
706 raise IOError("tell() returned an invalid position")
707 return pos
708
709 def truncate(self, pos=None):
710 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
711 # and a flush may be necessary to synch both views of the current
712 # file state.
713 self.flush()
714
715 if pos is None:
716 pos = self.tell()
717 # XXX: Should seek() be used, instead of passing the position
718 # XXX directly to truncate?
719 return self.raw.truncate(pos)
720
721 ### Flush and close ###
722
723 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000724 if self.closed:
725 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726 self.raw.flush()
727
728 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000729 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100730 try:
731 # may raise BlockingIOError or BrokenPipeError etc
732 self.flush()
733 finally:
734 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000736 def detach(self):
737 if self.raw is None:
738 raise ValueError("raw stream already detached")
739 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000740 raw = self._raw
741 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000742 return raw
743
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744 ### Inquiries ###
745
746 def seekable(self):
747 return self.raw.seekable()
748
749 def readable(self):
750 return self.raw.readable()
751
752 def writable(self):
753 return self.raw.writable()
754
755 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000756 def raw(self):
757 return self._raw
758
759 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760 def closed(self):
761 return self.raw.closed
762
763 @property
764 def name(self):
765 return self.raw.name
766
767 @property
768 def mode(self):
769 return self.raw.mode
770
Antoine Pitrou243757e2010-11-05 21:15:39 +0000771 def __getstate__(self):
772 raise TypeError("can not serialize a '{0}' object"
773 .format(self.__class__.__name__))
774
Antoine Pitrou716c4442009-05-23 19:04:03 +0000775 def __repr__(self):
776 clsname = self.__class__.__name__
777 try:
778 name = self.name
779 except AttributeError:
780 return "<_pyio.{0}>".format(clsname)
781 else:
782 return "<_pyio.{0} name={1!r}>".format(clsname, name)
783
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 ### Lower-level APIs ###
785
786 def fileno(self):
787 return self.raw.fileno()
788
789 def isatty(self):
790 return self.raw.isatty()
791
792
793class BytesIO(BufferedIOBase):
794
795 """Buffered I/O implementation using an in-memory bytes buffer."""
796
797 def __init__(self, initial_bytes=None):
798 buf = bytearray()
799 if initial_bytes is not None:
800 buf += initial_bytes
801 self._buffer = buf
802 self._pos = 0
803
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000804 def __getstate__(self):
805 if self.closed:
806 raise ValueError("__getstate__ on closed file")
807 return self.__dict__.copy()
808
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 def getvalue(self):
810 """Return the bytes value (contents) of the buffer
811 """
812 if self.closed:
813 raise ValueError("getvalue on closed file")
814 return bytes(self._buffer)
815
Antoine Pitrou972ee132010-09-06 18:48:21 +0000816 def getbuffer(self):
817 """Return a readable and writable view of the buffer.
818 """
819 return memoryview(self._buffer)
820
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821 def read(self, n=None):
822 if self.closed:
823 raise ValueError("read from closed file")
824 if n is None:
825 n = -1
826 if n < 0:
827 n = len(self._buffer)
828 if len(self._buffer) <= self._pos:
829 return b""
830 newpos = min(len(self._buffer), self._pos + n)
831 b = self._buffer[self._pos : newpos]
832 self._pos = newpos
833 return bytes(b)
834
835 def read1(self, n):
836 """This is the same as read.
837 """
838 return self.read(n)
839
840 def write(self, b):
841 if self.closed:
842 raise ValueError("write to closed file")
843 if isinstance(b, str):
844 raise TypeError("can't write str to binary stream")
845 n = len(b)
846 if n == 0:
847 return 0
848 pos = self._pos
849 if pos > len(self._buffer):
850 # Inserts null bytes between the current end of the file
851 # and the new write position.
852 padding = b'\x00' * (pos - len(self._buffer))
853 self._buffer += padding
854 self._buffer[pos:pos + n] = b
855 self._pos += n
856 return n
857
858 def seek(self, pos, whence=0):
859 if self.closed:
860 raise ValueError("seek on closed file")
861 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000862 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 except AttributeError as err:
864 raise TypeError("an integer is required") from err
865 if whence == 0:
866 if pos < 0:
867 raise ValueError("negative seek position %r" % (pos,))
868 self._pos = pos
869 elif whence == 1:
870 self._pos = max(0, self._pos + pos)
871 elif whence == 2:
872 self._pos = max(0, len(self._buffer) + pos)
873 else:
Jesus Cea94363612012-06-22 18:32:07 +0200874 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 return self._pos
876
877 def tell(self):
878 if self.closed:
879 raise ValueError("tell on closed file")
880 return self._pos
881
882 def truncate(self, pos=None):
883 if self.closed:
884 raise ValueError("truncate on closed file")
885 if pos is None:
886 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000887 else:
888 try:
889 pos.__index__
890 except AttributeError as err:
891 raise TypeError("an integer is required") from err
892 if pos < 0:
893 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000894 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000895 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896
897 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200898 if self.closed:
899 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 return True
901
902 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200903 if self.closed:
904 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000905 return True
906
907 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200908 if self.closed:
909 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 return True
911
912
913class BufferedReader(_BufferedIOMixin):
914
915 """BufferedReader(raw[, buffer_size])
916
917 A buffer for a readable, sequential BaseRawIO object.
918
919 The constructor creates a BufferedReader for the given readable raw
920 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
921 is used.
922 """
923
924 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
925 """Create a new buffered reader using the given readable raw IO object.
926 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000927 if not raw.readable():
928 raise IOError('"raw" argument must be readable.')
929
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000930 _BufferedIOMixin.__init__(self, raw)
931 if buffer_size <= 0:
932 raise ValueError("invalid buffer size")
933 self.buffer_size = buffer_size
934 self._reset_read_buf()
935 self._read_lock = Lock()
936
937 def _reset_read_buf(self):
938 self._read_buf = b""
939 self._read_pos = 0
940
941 def read(self, n=None):
942 """Read n bytes.
943
944 Returns exactly n bytes of data unless the underlying raw IO
945 stream reaches EOF or if the call would block in non-blocking
946 mode. If n is negative, read until EOF or until read() would
947 block.
948 """
949 if n is not None and n < -1:
950 raise ValueError("invalid number of bytes to read")
951 with self._read_lock:
952 return self._read_unlocked(n)
953
954 def _read_unlocked(self, n=None):
955 nodata_val = b""
956 empty_values = (b"", None)
957 buf = self._read_buf
958 pos = self._read_pos
959
960 # Special case for when the number of bytes to read is unspecified.
961 if n is None or n == -1:
962 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200963 if hasattr(self.raw, 'readall'):
964 chunk = self.raw.readall()
965 if chunk is None:
966 return buf[pos:] or None
967 else:
968 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000969 chunks = [buf[pos:]] # Strip the consumed bytes.
970 current_size = 0
971 while True:
972 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000973 try:
974 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200975 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000976 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000977 if chunk in empty_values:
978 nodata_val = chunk
979 break
980 current_size += len(chunk)
981 chunks.append(chunk)
982 return b"".join(chunks) or nodata_val
983
984 # The number of bytes to read is specified, return at most n bytes.
985 avail = len(buf) - pos # Length of the available buffered data.
986 if n <= avail:
987 # Fast path: the data to read is fully buffered.
988 self._read_pos += n
989 return buf[pos:pos+n]
990 # Slow path: read from the stream until enough bytes are read,
991 # or until an EOF occurs or until read() would block.
992 chunks = [buf[pos:]]
993 wanted = max(self.buffer_size, n)
994 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000995 try:
996 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200997 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000998 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 if chunk in empty_values:
1000 nodata_val = chunk
1001 break
1002 avail += len(chunk)
1003 chunks.append(chunk)
1004 # n is more then avail only when an EOF occurred or when
1005 # read() would have blocked.
1006 n = min(n, avail)
1007 out = b"".join(chunks)
1008 self._read_buf = out[n:] # Save the extra data in the buffer.
1009 self._read_pos = 0
1010 return out[:n] if out else nodata_val
1011
1012 def peek(self, n=0):
1013 """Returns buffered bytes without advancing the position.
1014
1015 The argument indicates a desired minimal number of bytes; we
1016 do at most one raw read to satisfy it. We never return more
1017 than self.buffer_size.
1018 """
1019 with self._read_lock:
1020 return self._peek_unlocked(n)
1021
1022 def _peek_unlocked(self, n=0):
1023 want = min(n, self.buffer_size)
1024 have = len(self._read_buf) - self._read_pos
1025 if have < want or have <= 0:
1026 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001027 while True:
1028 try:
1029 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001030 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001031 continue
1032 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 if current:
1034 self._read_buf = self._read_buf[self._read_pos:] + current
1035 self._read_pos = 0
1036 return self._read_buf[self._read_pos:]
1037
1038 def read1(self, n):
1039 """Reads up to n bytes, with at most one read() system call."""
1040 # Returns up to n bytes. If at least one byte is buffered, we
1041 # only return buffered bytes. Otherwise, we do one raw read.
1042 if n < 0:
1043 raise ValueError("number of bytes to read must be positive")
1044 if n == 0:
1045 return b""
1046 with self._read_lock:
1047 self._peek_unlocked(1)
1048 return self._read_unlocked(
1049 min(n, len(self._read_buf) - self._read_pos))
1050
1051 def tell(self):
1052 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1053
1054 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001055 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001056 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 with self._read_lock:
1058 if whence == 1:
1059 pos -= len(self._read_buf) - self._read_pos
1060 pos = _BufferedIOMixin.seek(self, pos, whence)
1061 self._reset_read_buf()
1062 return pos
1063
1064class BufferedWriter(_BufferedIOMixin):
1065
1066 """A buffer for a writeable sequential RawIO object.
1067
1068 The constructor creates a BufferedWriter for the given writeable raw
1069 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001070 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 """
1072
Florent Xicluna109d5732012-07-07 17:03:22 +02001073 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001074 if not raw.writable():
1075 raise IOError('"raw" argument must be writable.')
1076
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 _BufferedIOMixin.__init__(self, raw)
1078 if buffer_size <= 0:
1079 raise ValueError("invalid buffer size")
1080 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 self._write_buf = bytearray()
1082 self._write_lock = Lock()
1083
1084 def write(self, b):
1085 if self.closed:
1086 raise ValueError("write to closed file")
1087 if isinstance(b, str):
1088 raise TypeError("can't write str to binary stream")
1089 with self._write_lock:
1090 # XXX we can implement some more tricks to try and avoid
1091 # partial writes
1092 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001093 # We're full, so let's pre-flush the buffer. (This may
1094 # raise BlockingIOError with characters_written == 0.)
1095 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096 before = len(self._write_buf)
1097 self._write_buf.extend(b)
1098 written = len(self._write_buf) - before
1099 if len(self._write_buf) > self.buffer_size:
1100 try:
1101 self._flush_unlocked()
1102 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001103 if len(self._write_buf) > self.buffer_size:
1104 # We've hit the buffer_size. We have to accept a partial
1105 # write and cut back our buffer.
1106 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001108 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 raise BlockingIOError(e.errno, e.strerror, written)
1110 return written
1111
1112 def truncate(self, pos=None):
1113 with self._write_lock:
1114 self._flush_unlocked()
1115 if pos is None:
1116 pos = self.raw.tell()
1117 return self.raw.truncate(pos)
1118
1119 def flush(self):
1120 with self._write_lock:
1121 self._flush_unlocked()
1122
1123 def _flush_unlocked(self):
1124 if self.closed:
1125 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001126 while self._write_buf:
1127 try:
1128 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001129 except InterruptedError:
1130 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001131 except BlockingIOError:
1132 raise RuntimeError("self.raw should implement RawIOBase: it "
1133 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001134 if n is None:
1135 raise BlockingIOError(
1136 errno.EAGAIN,
1137 "write could not complete without blocking", 0)
1138 if n > len(self._write_buf) or n < 0:
1139 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141
1142 def tell(self):
1143 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1144
1145 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001146 if whence not in valid_seek_flags:
1147 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 with self._write_lock:
1149 self._flush_unlocked()
1150 return _BufferedIOMixin.seek(self, pos, whence)
1151
1152
1153class BufferedRWPair(BufferedIOBase):
1154
1155 """A buffered reader and writer object together.
1156
1157 A buffered reader object and buffered writer object put together to
1158 form a sequential IO object that can read and write. This is typically
1159 used with a socket or two-way pipe.
1160
1161 reader and writer are RawIOBase objects that are readable and
1162 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001163 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 """
1165
1166 # XXX The usefulness of this (compared to having two separate IO
1167 # objects) is questionable.
1168
Florent Xicluna109d5732012-07-07 17:03:22 +02001169 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 """Constructor.
1171
1172 The arguments are two RawIO instances.
1173 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001174 if not reader.readable():
1175 raise IOError('"reader" argument must be readable.')
1176
1177 if not writer.writable():
1178 raise IOError('"writer" argument must be writable.')
1179
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001181 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182
1183 def read(self, n=None):
1184 if n is None:
1185 n = -1
1186 return self.reader.read(n)
1187
1188 def readinto(self, b):
1189 return self.reader.readinto(b)
1190
1191 def write(self, b):
1192 return self.writer.write(b)
1193
1194 def peek(self, n=0):
1195 return self.reader.peek(n)
1196
1197 def read1(self, n):
1198 return self.reader.read1(n)
1199
1200 def readable(self):
1201 return self.reader.readable()
1202
1203 def writable(self):
1204 return self.writer.writable()
1205
1206 def flush(self):
1207 return self.writer.flush()
1208
1209 def close(self):
1210 self.writer.close()
1211 self.reader.close()
1212
1213 def isatty(self):
1214 return self.reader.isatty() or self.writer.isatty()
1215
1216 @property
1217 def closed(self):
1218 return self.writer.closed
1219
1220
1221class BufferedRandom(BufferedWriter, BufferedReader):
1222
1223 """A buffered interface to random access streams.
1224
1225 The constructor creates a reader and writer for a seekable stream,
1226 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001227 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228 """
1229
Florent Xicluna109d5732012-07-07 17:03:22 +02001230 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231 raw._checkSeekable()
1232 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001233 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234
1235 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001236 if whence not in valid_seek_flags:
1237 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 self.flush()
1239 if self._read_buf:
1240 # Undo read ahead.
1241 with self._read_lock:
1242 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1243 # First do the raw seek, then empty the read buffer, so that
1244 # if the raw seek fails, we don't lose buffered data forever.
1245 pos = self.raw.seek(pos, whence)
1246 with self._read_lock:
1247 self._reset_read_buf()
1248 if pos < 0:
1249 raise IOError("seek() returned invalid position")
1250 return pos
1251
1252 def tell(self):
1253 if self._write_buf:
1254 return BufferedWriter.tell(self)
1255 else:
1256 return BufferedReader.tell(self)
1257
1258 def truncate(self, pos=None):
1259 if pos is None:
1260 pos = self.tell()
1261 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001262 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263
1264 def read(self, n=None):
1265 if n is None:
1266 n = -1
1267 self.flush()
1268 return BufferedReader.read(self, n)
1269
1270 def readinto(self, b):
1271 self.flush()
1272 return BufferedReader.readinto(self, b)
1273
1274 def peek(self, n=0):
1275 self.flush()
1276 return BufferedReader.peek(self, n)
1277
1278 def read1(self, n):
1279 self.flush()
1280 return BufferedReader.read1(self, n)
1281
1282 def write(self, b):
1283 if self._read_buf:
1284 # Undo readahead
1285 with self._read_lock:
1286 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1287 self._reset_read_buf()
1288 return BufferedWriter.write(self, b)
1289
1290
1291class TextIOBase(IOBase):
1292
1293 """Base class for text I/O.
1294
1295 This class provides a character and line based interface to stream
1296 I/O. There is no readinto method because Python's character strings
1297 are immutable. There is no public constructor.
1298 """
1299
Georg Brandl4d73b572011-01-13 07:13:06 +00001300 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001301 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
1303 Read from underlying buffer until we have n characters or we hit EOF.
1304 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001305
1306 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 """
1308 self._unsupported("read")
1309
Raymond Hettinger3c940242011-01-12 23:39:31 +00001310 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001311 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 self._unsupported("write")
1313
Georg Brandl4d73b572011-01-13 07:13:06 +00001314 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001315 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 self._unsupported("truncate")
1317
Raymond Hettinger3c940242011-01-12 23:39:31 +00001318 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 """Read until newline or EOF.
1320
1321 Returns an empty string if EOF is hit immediately.
1322 """
1323 self._unsupported("readline")
1324
Raymond Hettinger3c940242011-01-12 23:39:31 +00001325 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001326 """
1327 Separate the underlying buffer from the TextIOBase and return it.
1328
1329 After the underlying buffer has been detached, the TextIO is in an
1330 unusable state.
1331 """
1332 self._unsupported("detach")
1333
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334 @property
1335 def encoding(self):
1336 """Subclasses should override."""
1337 return None
1338
1339 @property
1340 def newlines(self):
1341 """Line endings translated so far.
1342
1343 Only line endings translated during reading are considered.
1344
1345 Subclasses should override.
1346 """
1347 return None
1348
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001349 @property
1350 def errors(self):
1351 """Error setting of the decoder or encoder.
1352
1353 Subclasses should override."""
1354 return None
1355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356io.TextIOBase.register(TextIOBase)
1357
1358
1359class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1360 r"""Codec used when reading a file in universal newlines mode. It wraps
1361 another incremental decoder, translating \r\n and \r into \n. It also
1362 records the types of newlines encountered. When used with
1363 translate=False, it ensures that the newline sequence is returned in
1364 one piece.
1365 """
1366 def __init__(self, decoder, translate, errors='strict'):
1367 codecs.IncrementalDecoder.__init__(self, errors=errors)
1368 self.translate = translate
1369 self.decoder = decoder
1370 self.seennl = 0
1371 self.pendingcr = False
1372
1373 def decode(self, input, final=False):
1374 # decode input (with the eventual \r from a previous pass)
1375 if self.decoder is None:
1376 output = input
1377 else:
1378 output = self.decoder.decode(input, final=final)
1379 if self.pendingcr and (output or final):
1380 output = "\r" + output
1381 self.pendingcr = False
1382
1383 # retain last \r even when not translating data:
1384 # then readline() is sure to get \r\n in one pass
1385 if output.endswith("\r") and not final:
1386 output = output[:-1]
1387 self.pendingcr = True
1388
1389 # Record which newlines are read
1390 crlf = output.count('\r\n')
1391 cr = output.count('\r') - crlf
1392 lf = output.count('\n') - crlf
1393 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1394 | (crlf and self._CRLF)
1395
1396 if self.translate:
1397 if crlf:
1398 output = output.replace("\r\n", "\n")
1399 if cr:
1400 output = output.replace("\r", "\n")
1401
1402 return output
1403
1404 def getstate(self):
1405 if self.decoder is None:
1406 buf = b""
1407 flag = 0
1408 else:
1409 buf, flag = self.decoder.getstate()
1410 flag <<= 1
1411 if self.pendingcr:
1412 flag |= 1
1413 return buf, flag
1414
1415 def setstate(self, state):
1416 buf, flag = state
1417 self.pendingcr = bool(flag & 1)
1418 if self.decoder is not None:
1419 self.decoder.setstate((buf, flag >> 1))
1420
1421 def reset(self):
1422 self.seennl = 0
1423 self.pendingcr = False
1424 if self.decoder is not None:
1425 self.decoder.reset()
1426
1427 _LF = 1
1428 _CR = 2
1429 _CRLF = 4
1430
1431 @property
1432 def newlines(self):
1433 return (None,
1434 "\n",
1435 "\r",
1436 ("\r", "\n"),
1437 "\r\n",
1438 ("\n", "\r\n"),
1439 ("\r", "\r\n"),
1440 ("\r", "\n", "\r\n")
1441 )[self.seennl]
1442
1443
1444class TextIOWrapper(TextIOBase):
1445
1446 r"""Character and line based layer over a BufferedIOBase object, buffer.
1447
1448 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001449 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450
1451 errors determines the strictness of encoding and decoding (see the
1452 codecs.register) and defaults to "strict".
1453
1454 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1455 handling of line endings. If it is None, universal newlines is
1456 enabled. With this enabled, on input, the lines endings '\n', '\r',
1457 or '\r\n' are translated to '\n' before being returned to the
1458 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001459 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001460 legal values, that newline becomes the newline when the file is read
1461 and it is returned untranslated. On output, '\n' is converted to the
1462 newline.
1463
1464 If line_buffering is True, a call to flush is implied when a call to
1465 write contains a newline character.
1466 """
1467
1468 _CHUNK_SIZE = 2048
1469
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001470 # The write_through argument has no effect here since this
1471 # implementation always writes through. The argument is present only
1472 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001474 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001475 if newline is not None and not isinstance(newline, str):
1476 raise TypeError("illegal newline type: %r" % (type(newline),))
1477 if newline not in (None, "", "\n", "\r", "\r\n"):
1478 raise ValueError("illegal newline value: %r" % (newline,))
1479 if encoding is None:
1480 try:
1481 encoding = os.device_encoding(buffer.fileno())
1482 except (AttributeError, UnsupportedOperation):
1483 pass
1484 if encoding is None:
1485 try:
1486 import locale
1487 except ImportError:
1488 # Importing locale may fail if Python is being built
1489 encoding = "ascii"
1490 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001491 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492
1493 if not isinstance(encoding, str):
1494 raise ValueError("invalid encoding: %r" % encoding)
1495
1496 if errors is None:
1497 errors = "strict"
1498 else:
1499 if not isinstance(errors, str):
1500 raise ValueError("invalid errors: %r" % errors)
1501
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001502 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 self._line_buffering = line_buffering
1504 self._encoding = encoding
1505 self._errors = errors
1506 self._readuniversal = not newline
1507 self._readtranslate = newline is None
1508 self._readnl = newline
1509 self._writetranslate = newline != ''
1510 self._writenl = newline or os.linesep
1511 self._encoder = None
1512 self._decoder = None
1513 self._decoded_chars = '' # buffer for text returned from decoder
1514 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1515 self._snapshot = None # info for reconstructing decoder state
1516 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001517 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001518 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001519
Antoine Pitroue4501852009-05-14 18:55:55 +00001520 if self._seekable and self.writable():
1521 position = self.buffer.tell()
1522 if position != 0:
1523 try:
1524 self._get_encoder().setstate(0)
1525 except LookupError:
1526 # Sometimes the encoder doesn't exist
1527 pass
1528
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1530 # where dec_flags is the second (integer) item of the decoder state
1531 # and next_input is the chunk of input bytes that comes next after the
1532 # snapshot point. We use this to reconstruct decoder states in tell().
1533
1534 # Naming convention:
1535 # - "bytes_..." for integer variables that count input bytes
1536 # - "chars_..." for integer variables that count decoded characters
1537
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001538 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001539 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001540 try:
1541 name = self.name
1542 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001543 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001544 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001545 result += " name={0!r}".format(name)
1546 try:
1547 mode = self.mode
1548 except AttributeError:
1549 pass
1550 else:
1551 result += " mode={0!r}".format(mode)
1552 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001553
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 @property
1555 def encoding(self):
1556 return self._encoding
1557
1558 @property
1559 def errors(self):
1560 return self._errors
1561
1562 @property
1563 def line_buffering(self):
1564 return self._line_buffering
1565
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001566 @property
1567 def buffer(self):
1568 return self._buffer
1569
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001571 if self.closed:
1572 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001573 return self._seekable
1574
1575 def readable(self):
1576 return self.buffer.readable()
1577
1578 def writable(self):
1579 return self.buffer.writable()
1580
1581 def flush(self):
1582 self.buffer.flush()
1583 self._telling = self._seekable
1584
1585 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001586 if self.buffer is not None and not self.closed:
1587 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001588 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001589
1590 @property
1591 def closed(self):
1592 return self.buffer.closed
1593
1594 @property
1595 def name(self):
1596 return self.buffer.name
1597
1598 def fileno(self):
1599 return self.buffer.fileno()
1600
1601 def isatty(self):
1602 return self.buffer.isatty()
1603
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001604 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001605 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 if self.closed:
1607 raise ValueError("write to closed file")
1608 if not isinstance(s, str):
1609 raise TypeError("can't write %s to text stream" %
1610 s.__class__.__name__)
1611 length = len(s)
1612 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1613 if haslf and self._writetranslate and self._writenl != "\n":
1614 s = s.replace("\n", self._writenl)
1615 encoder = self._encoder or self._get_encoder()
1616 # XXX What if we were just reading?
1617 b = encoder.encode(s)
1618 self.buffer.write(b)
1619 if self._line_buffering and (haslf or "\r" in s):
1620 self.flush()
1621 self._snapshot = None
1622 if self._decoder:
1623 self._decoder.reset()
1624 return length
1625
1626 def _get_encoder(self):
1627 make_encoder = codecs.getincrementalencoder(self._encoding)
1628 self._encoder = make_encoder(self._errors)
1629 return self._encoder
1630
1631 def _get_decoder(self):
1632 make_decoder = codecs.getincrementaldecoder(self._encoding)
1633 decoder = make_decoder(self._errors)
1634 if self._readuniversal:
1635 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1636 self._decoder = decoder
1637 return decoder
1638
1639 # The following three methods implement an ADT for _decoded_chars.
1640 # Text returned from the decoder is buffered here until the client
1641 # requests it by calling our read() or readline() method.
1642 def _set_decoded_chars(self, chars):
1643 """Set the _decoded_chars buffer."""
1644 self._decoded_chars = chars
1645 self._decoded_chars_used = 0
1646
1647 def _get_decoded_chars(self, n=None):
1648 """Advance into the _decoded_chars buffer."""
1649 offset = self._decoded_chars_used
1650 if n is None:
1651 chars = self._decoded_chars[offset:]
1652 else:
1653 chars = self._decoded_chars[offset:offset + n]
1654 self._decoded_chars_used += len(chars)
1655 return chars
1656
1657 def _rewind_decoded_chars(self, n):
1658 """Rewind the _decoded_chars buffer."""
1659 if self._decoded_chars_used < n:
1660 raise AssertionError("rewind decoded_chars out of bounds")
1661 self._decoded_chars_used -= n
1662
1663 def _read_chunk(self):
1664 """
1665 Read and decode the next chunk of data from the BufferedReader.
1666 """
1667
1668 # The return value is True unless EOF was reached. The decoded
1669 # string is placed in self._decoded_chars (replacing its previous
1670 # value). The entire input chunk is sent to the decoder, though
1671 # some of it may remain buffered in the decoder, yet to be
1672 # converted.
1673
1674 if self._decoder is None:
1675 raise ValueError("no decoder")
1676
1677 if self._telling:
1678 # To prepare for tell(), we need to snapshot a point in the
1679 # file where the decoder's input buffer is empty.
1680
1681 dec_buffer, dec_flags = self._decoder.getstate()
1682 # Given this, we know there was a valid snapshot point
1683 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1684
1685 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001686 if self._has_read1:
1687 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1688 else:
1689 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001691 decoded_chars = self._decoder.decode(input_chunk, eof)
1692 self._set_decoded_chars(decoded_chars)
1693 if decoded_chars:
1694 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1695 else:
1696 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697
1698 if self._telling:
1699 # At the snapshot point, len(dec_buffer) bytes before the read,
1700 # the next input to be decoded is dec_buffer + input_chunk.
1701 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1702
1703 return not eof
1704
1705 def _pack_cookie(self, position, dec_flags=0,
1706 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1707 # The meaning of a tell() cookie is: seek to position, set the
1708 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1709 # into the decoder with need_eof as the EOF flag, then skip
1710 # chars_to_skip characters of the decoded result. For most simple
1711 # decoders, tell() will often just give a byte offset in the file.
1712 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1713 (chars_to_skip<<192) | bool(need_eof)<<256)
1714
1715 def _unpack_cookie(self, bigint):
1716 rest, position = divmod(bigint, 1<<64)
1717 rest, dec_flags = divmod(rest, 1<<64)
1718 rest, bytes_to_feed = divmod(rest, 1<<64)
1719 need_eof, chars_to_skip = divmod(rest, 1<<64)
1720 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1721
1722 def tell(self):
1723 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001724 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if not self._telling:
1726 raise IOError("telling position disabled by next() call")
1727 self.flush()
1728 position = self.buffer.tell()
1729 decoder = self._decoder
1730 if decoder is None or self._snapshot is None:
1731 if self._decoded_chars:
1732 # This should never happen.
1733 raise AssertionError("pending decoded text")
1734 return position
1735
1736 # Skip backward to the snapshot point (see _read_chunk).
1737 dec_flags, next_input = self._snapshot
1738 position -= len(next_input)
1739
1740 # How many decoded characters have been used up since the snapshot?
1741 chars_to_skip = self._decoded_chars_used
1742 if chars_to_skip == 0:
1743 # We haven't moved from the snapshot point.
1744 return self._pack_cookie(position, dec_flags)
1745
1746 # Starting from the snapshot position, we will walk the decoder
1747 # forward until it gives us enough decoded characters.
1748 saved_state = decoder.getstate()
1749 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001750 # Fast search for an acceptable start point, close to our
1751 # current pos.
1752 # Rationale: calling decoder.decode() has a large overhead
1753 # regardless of chunk size; we want the number of such calls to
1754 # be O(1) in most situations (common decoders, non-crazy input).
1755 # Actually, it will be exactly 1 for fixed-size codecs (all
1756 # 8-bit codecs, also UTF-16 and UTF-32).
1757 skip_bytes = int(self._b2cratio * chars_to_skip)
1758 skip_back = 1
1759 assert skip_bytes <= len(next_input)
1760 while skip_bytes > 0:
1761 decoder.setstate((b'', dec_flags))
1762 # Decode up to temptative start point
1763 n = len(decoder.decode(next_input[:skip_bytes]))
1764 if n <= chars_to_skip:
1765 b, d = decoder.getstate()
1766 if not b:
1767 # Before pos and no bytes buffered in decoder => OK
1768 dec_flags = d
1769 chars_to_skip -= n
1770 break
1771 # Skip back by buffered amount and reset heuristic
1772 skip_bytes -= len(b)
1773 skip_back = 1
1774 else:
1775 # We're too far ahead, skip back a bit
1776 skip_bytes -= skip_back
1777 skip_back = skip_back * 2
1778 else:
1779 skip_bytes = 0
1780 decoder.setstate((b'', dec_flags))
1781
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001783 start_pos = position + skip_bytes
1784 start_flags = dec_flags
1785 if chars_to_skip == 0:
1786 # We haven't moved from the start point.
1787 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788
1789 # Feed the decoder one byte at a time. As we go, note the
1790 # nearest "safe start point" before the current location
1791 # (a point where the decoder has nothing buffered, so seek()
1792 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001793 bytes_fed = 0
1794 need_eof = 0
1795 # Chars decoded since `start_pos`
1796 chars_decoded = 0
1797 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001799 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 dec_buffer, dec_flags = decoder.getstate()
1801 if not dec_buffer and chars_decoded <= chars_to_skip:
1802 # Decoder buffer is empty, so this is a safe start point.
1803 start_pos += bytes_fed
1804 chars_to_skip -= chars_decoded
1805 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1806 if chars_decoded >= chars_to_skip:
1807 break
1808 else:
1809 # We didn't get enough decoded data; signal EOF to get more.
1810 chars_decoded += len(decoder.decode(b'', final=True))
1811 need_eof = 1
1812 if chars_decoded < chars_to_skip:
1813 raise IOError("can't reconstruct logical file position")
1814
1815 # The returned cookie corresponds to the last safe start point.
1816 return self._pack_cookie(
1817 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1818 finally:
1819 decoder.setstate(saved_state)
1820
1821 def truncate(self, pos=None):
1822 self.flush()
1823 if pos is None:
1824 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001825 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001827 def detach(self):
1828 if self.buffer is None:
1829 raise ValueError("buffer is already detached")
1830 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001831 buffer = self._buffer
1832 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001833 return buffer
1834
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 def seek(self, cookie, whence=0):
1836 if self.closed:
1837 raise ValueError("tell on closed file")
1838 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001839 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001840 if whence == 1: # seek relative to current position
1841 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001842 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 # Seeking to the current position should attempt to
1844 # sync the underlying buffer with the current position.
1845 whence = 0
1846 cookie = self.tell()
1847 if whence == 2: # seek relative to end of file
1848 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001849 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001850 self.flush()
1851 position = self.buffer.seek(0, 2)
1852 self._set_decoded_chars('')
1853 self._snapshot = None
1854 if self._decoder:
1855 self._decoder.reset()
1856 return position
1857 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001858 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 if cookie < 0:
1860 raise ValueError("negative seek position %r" % (cookie,))
1861 self.flush()
1862
1863 # The strategy of seek() is to go back to the safe start point
1864 # and replay the effect of read(chars_to_skip) from there.
1865 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1866 self._unpack_cookie(cookie)
1867
1868 # Seek back to the safe start point.
1869 self.buffer.seek(start_pos)
1870 self._set_decoded_chars('')
1871 self._snapshot = None
1872
1873 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001874 if cookie == 0 and self._decoder:
1875 self._decoder.reset()
1876 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877 self._decoder = self._decoder or self._get_decoder()
1878 self._decoder.setstate((b'', dec_flags))
1879 self._snapshot = (dec_flags, b'')
1880
1881 if chars_to_skip:
1882 # Just like _read_chunk, feed the decoder and save a snapshot.
1883 input_chunk = self.buffer.read(bytes_to_feed)
1884 self._set_decoded_chars(
1885 self._decoder.decode(input_chunk, need_eof))
1886 self._snapshot = (dec_flags, input_chunk)
1887
1888 # Skip chars_to_skip of the decoded characters.
1889 if len(self._decoded_chars) < chars_to_skip:
1890 raise IOError("can't restore logical file position")
1891 self._decoded_chars_used = chars_to_skip
1892
Antoine Pitroue4501852009-05-14 18:55:55 +00001893 # Finally, reset the encoder (merely useful for proper BOM handling)
1894 try:
1895 encoder = self._encoder or self._get_encoder()
1896 except LookupError:
1897 # Sometimes the encoder doesn't exist
1898 pass
1899 else:
1900 if cookie != 0:
1901 encoder.setstate(0)
1902 else:
1903 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904 return cookie
1905
1906 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001907 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 if n is None:
1909 n = -1
1910 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001911 try:
1912 n.__index__
1913 except AttributeError as err:
1914 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915 if n < 0:
1916 # Read everything.
1917 result = (self._get_decoded_chars() +
1918 decoder.decode(self.buffer.read(), final=True))
1919 self._set_decoded_chars('')
1920 self._snapshot = None
1921 return result
1922 else:
1923 # Keep reading chunks until we have n characters to return.
1924 eof = False
1925 result = self._get_decoded_chars(n)
1926 while len(result) < n and not eof:
1927 eof = not self._read_chunk()
1928 result += self._get_decoded_chars(n - len(result))
1929 return result
1930
1931 def __next__(self):
1932 self._telling = False
1933 line = self.readline()
1934 if not line:
1935 self._snapshot = None
1936 self._telling = self._seekable
1937 raise StopIteration
1938 return line
1939
1940 def readline(self, limit=None):
1941 if self.closed:
1942 raise ValueError("read from closed file")
1943 if limit is None:
1944 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001945 elif not isinstance(limit, int):
1946 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947
1948 # Grab all the decoded text (we will rewind any extra bits later).
1949 line = self._get_decoded_chars()
1950
1951 start = 0
1952 # Make the decoder if it doesn't already exist.
1953 if not self._decoder:
1954 self._get_decoder()
1955
1956 pos = endpos = None
1957 while True:
1958 if self._readtranslate:
1959 # Newlines are already translated, only search for \n
1960 pos = line.find('\n', start)
1961 if pos >= 0:
1962 endpos = pos + 1
1963 break
1964 else:
1965 start = len(line)
1966
1967 elif self._readuniversal:
1968 # Universal newline search. Find any of \r, \r\n, \n
1969 # The decoder ensures that \r\n are not split in two pieces
1970
1971 # In C we'd look for these in parallel of course.
1972 nlpos = line.find("\n", start)
1973 crpos = line.find("\r", start)
1974 if crpos == -1:
1975 if nlpos == -1:
1976 # Nothing found
1977 start = len(line)
1978 else:
1979 # Found \n
1980 endpos = nlpos + 1
1981 break
1982 elif nlpos == -1:
1983 # Found lone \r
1984 endpos = crpos + 1
1985 break
1986 elif nlpos < crpos:
1987 # Found \n
1988 endpos = nlpos + 1
1989 break
1990 elif nlpos == crpos + 1:
1991 # Found \r\n
1992 endpos = crpos + 2
1993 break
1994 else:
1995 # Found \r
1996 endpos = crpos + 1
1997 break
1998 else:
1999 # non-universal
2000 pos = line.find(self._readnl)
2001 if pos >= 0:
2002 endpos = pos + len(self._readnl)
2003 break
2004
2005 if limit >= 0 and len(line) >= limit:
2006 endpos = limit # reached length limit
2007 break
2008
2009 # No line ending seen yet - get more data'
2010 while self._read_chunk():
2011 if self._decoded_chars:
2012 break
2013 if self._decoded_chars:
2014 line += self._get_decoded_chars()
2015 else:
2016 # end of file
2017 self._set_decoded_chars('')
2018 self._snapshot = None
2019 return line
2020
2021 if limit >= 0 and endpos > limit:
2022 endpos = limit # don't exceed limit
2023
2024 # Rewind _decoded_chars to just after the line ending we found.
2025 self._rewind_decoded_chars(len(line) - endpos)
2026 return line[:endpos]
2027
2028 @property
2029 def newlines(self):
2030 return self._decoder.newlines if self._decoder else None
2031
2032
2033class StringIO(TextIOWrapper):
2034 """Text I/O implementation using an in-memory buffer.
2035
2036 The initial_value argument sets the value of object. The newline
2037 argument is like the one of TextIOWrapper's constructor.
2038 """
2039
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 def __init__(self, initial_value="", newline="\n"):
2041 super(StringIO, self).__init__(BytesIO(),
2042 encoding="utf-8",
2043 errors="strict",
2044 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002045 # Issue #5645: make universal newlines semantics the same as in the
2046 # C version, even under Windows.
2047 if newline is None:
2048 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002049 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002051 raise TypeError("initial_value must be str or None, not {0}"
2052 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 initial_value = str(initial_value)
2054 self.write(initial_value)
2055 self.seek(0)
2056
2057 def getvalue(self):
2058 self.flush()
2059 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002060
2061 def __repr__(self):
2062 # TextIOWrapper tells the encoding in its repr. In StringIO,
2063 # that's a implementation detail.
2064 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002065
2066 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002067 def errors(self):
2068 return None
2069
2070 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002071 def encoding(self):
2072 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002073
2074 def detach(self):
2075 # This doesn't make sense on StringIO.
2076 self._unsupported("detach")