blob: 1ce61e94b16f4d5250cc93f06ab4d58f516df27f [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
Jesus Cea94363612012-06-22 18:32:07 +020018valid_seek_flags = {0, 1, 2} # Hardwired values
19if hasattr(os, 'SEEK_HOLE') :
20 valid_seek_flags.add(os.SEEK_HOLE)
21 valid_seek_flags.add(os.SEEK_DATA)
22
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020030# Rebind for compatibility
31BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33
Georg Brandl4d73b572011-01-13 07:13:06 +000034def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020035 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
37 r"""Open file and return a stream. Raise IOError upon failure.
38
39 file is either a text or byte string giving the name (and the path
40 if the file isn't in the current working directory) of the file to
41 be opened or an integer file descriptor of the file to be
42 wrapped. (If a file descriptor is given, it is closed when the
43 returned I/O object is closed, unless closefd is set to False.)
44
Charles-François Natalidc3044c2012-01-09 22:40:02 +010045 mode is an optional string that specifies the mode in which the file is
46 opened. It defaults to 'r' which means open for reading in text mode. Other
47 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010048 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010049 (which on some Unix systems, means that all writes append to the end of the
50 file regardless of the current seek position). In text mode, if encoding is
51 not specified the encoding used is platform dependent. (For reading and
52 writing raw bytes use binary mode and leave encoding unspecified.) The
53 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000054
55 ========= ===============================================================
56 Character Meaning
57 --------- ---------------------------------------------------------------
58 'r' open for reading (default)
59 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010060 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061 'a' open for writing, appending to the end of the file if it exists
62 'b' binary mode
63 't' text mode (default)
64 '+' open a disk file for updating (reading and writing)
65 'U' universal newline mode (for backwards compatibility; unneeded
66 for new code)
67 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Antoine Pitroud5587bc2009-12-19 21:08:31 +000082 buffering is an optional integer used to set the buffering policy.
83 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
84 line buffering (only usable in text mode), and an integer > 1 to indicate
85 the size of a fixed-size chunk buffer. When no buffering argument is
86 given, the default buffering policy works as follows:
87
88 * Binary files are buffered in fixed-size chunks; the size of the buffer
89 is chosen using a heuristic trying to determine the underlying device's
90 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
91 On many systems, the buffer will typically be 4096 or 8192 bytes long.
92
93 * "Interactive" text files (files for which isatty() returns True)
94 use line buffering. Other text files use the policy described above
95 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096
Raymond Hettingercbb80892011-01-13 18:15:51 +000097 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098 file. This should only be used in text mode. The default encoding is
99 platform dependent, but any encoding supported by Python can be
100 passed. See the codecs module for the list of supported encodings.
101
102 errors is an optional string that specifies how encoding errors are to
103 be handled---this argument should not be used in binary mode. Pass
104 'strict' to raise a ValueError exception if there is an encoding error
105 (the default of None has the same effect), or pass 'ignore' to ignore
106 errors. (Note that ignoring encoding errors can lead to data loss.)
107 See the documentation for codecs.register for a list of the permitted
108 encoding error strings.
109
Raymond Hettingercbb80892011-01-13 18:15:51 +0000110 newline is a string controlling how universal newlines works (it only
111 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
112 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113
114 * On input, if newline is None, universal newlines mode is
115 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
116 these are translated into '\n' before being returned to the
117 caller. If it is '', universal newline mode is enabled, but line
118 endings are returned to the caller untranslated. If it has any of
119 the other legal values, input lines are only terminated by the given
120 string, and the line ending is returned to the caller untranslated.
121
122 * On output, if newline is None, any '\n' characters written are
123 translated to the system default line separator, os.linesep. If
124 newline is '', no translation takes place. If newline is any of the
125 other legal values, any '\n' characters written are translated to
126 the given string.
127
Raymond Hettingercbb80892011-01-13 18:15:51 +0000128 closedfd is a bool. If closefd is False, the underlying file descriptor will
129 be kept open when the file is closed. This does not work when a file name is
130 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131
Ross Lagerwall59142db2011-10-31 20:34:46 +0200132 A custom opener can be used by passing a callable as *opener*. The
133 underlying file descriptor for the file object is then obtained by calling
134 *opener* with (*file*, *flags*). *opener* must return an open file
135 descriptor (passing os.open as *opener* results in functionality similar to
136 passing None).
137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (str, bytes, int)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, str):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000156 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, str):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, str):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100163 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100165 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100180 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200194 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 line_buffering = False
196 if buffering == 1 or buffering < 0 and raw.isatty():
197 buffering = -1
198 line_buffering = True
199 if buffering < 0:
200 buffering = DEFAULT_BUFFER_SIZE
201 try:
202 bs = os.fstat(raw.fileno()).st_blksize
203 except (os.error, AttributeError):
204 pass
205 else:
206 if bs > 1:
207 buffering = bs
208 if buffering < 0:
209 raise ValueError("invalid buffering size")
210 if buffering == 0:
211 if binary:
212 return raw
213 raise ValueError("can't have unbuffered text I/O")
214 if updating:
215 buffer = BufferedRandom(raw, buffering)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100216 elif creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000217 buffer = BufferedWriter(raw, buffering)
218 elif reading:
219 buffer = BufferedReader(raw, buffering)
220 else:
221 raise ValueError("unknown mode: %r" % mode)
222 if binary:
223 return buffer
224 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225 text.mode = mode
226 return text
227
228
229class DocDescriptor:
230 """Helper for builtins.open.__doc__
231 """
232 def __get__(self, obj, typ):
233 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000234 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 "errors=None, newline=None, closefd=True)\n\n" +
236 open.__doc__)
237
238class OpenWrapper:
239 """Wrapper for builtins.open
240
241 Trick so that open won't become a bound method when stored
242 as a class variable (as dbm.dumb does).
243
244 See initstdio() in Python/pythonrun.c.
245 """
246 __doc__ = DocDescriptor()
247
248 def __new__(cls, *args, **kwargs):
249 return open(*args, **kwargs)
250
251
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000252# In normal operation, both `UnsupportedOperation`s should be bound to the
253# same object.
254try:
255 UnsupportedOperation = io.UnsupportedOperation
256except AttributeError:
257 class UnsupportedOperation(ValueError, IOError):
258 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259
260
261class IOBase(metaclass=abc.ABCMeta):
262
263 """The abstract base class for all I/O classes, acting on streams of
264 bytes. There is no public constructor.
265
266 This class provides dummy implementations for many methods that
267 derived classes can override selectively; the default implementations
268 represent a file that cannot be read, written or seeked.
269
270 Even though IOBase does not declare read, readinto, or write because
271 their signatures will vary, implementations and clients should
272 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000273 may raise UnsupportedOperation when operations they do not support are
274 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276 The basic type used for binary data read from or written to a file is
277 bytes. bytearrays are accepted too, and in some cases (such as
278 readinto) needed. Text I/O classes work with str data.
279
280 Note that calling any method (even inquiries) on a closed stream is
281 undefined. Implementations may raise IOError in this case.
282
283 IOBase (and its subclasses) support the iterator protocol, meaning
284 that an IOBase object can be iterated over yielding the lines in a
285 stream.
286
287 IOBase also supports the :keyword:`with` statement. In this example,
288 fp is closed after the suite of the with statement is complete:
289
290 with open('spam.txt', 'r') as fp:
291 fp.write('Spam and eggs!')
292 """
293
294 ### Internal ###
295
Raymond Hettinger3c940242011-01-12 23:39:31 +0000296 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000297 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 raise UnsupportedOperation("%s.%s() not supported" %
299 (self.__class__.__name__, name))
300
301 ### Positioning ###
302
Georg Brandl4d73b572011-01-13 07:13:06 +0000303 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304 """Change stream position.
305
306 Change the stream position to byte offset offset. offset is
307 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000308 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309
310 * 0 -- start of stream (the default); offset should be zero or positive
311 * 1 -- current stream position; offset may be negative
312 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200313 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314
Raymond Hettingercbb80892011-01-13 18:15:51 +0000315 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """
317 self._unsupported("seek")
318
Raymond Hettinger3c940242011-01-12 23:39:31 +0000319 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000320 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 return self.seek(0, 1)
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Truncate file to size bytes.
325
326 Size defaults to the current IO position as reported by tell(). Return
327 the new size.
328 """
329 self._unsupported("truncate")
330
331 ### Flush and close ###
332
Raymond Hettinger3c940242011-01-12 23:39:31 +0000333 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 """Flush write buffers, if applicable.
335
336 This is not implemented for read-only and non-blocking streams.
337 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000338 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 # XXX Should this return the number of bytes written???
340
341 __closed = False
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Flush and close the IO object.
345
346 This method has no effect if the file is already closed.
347 """
348 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000349 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self.__closed = True
351
Raymond Hettinger3c940242011-01-12 23:39:31 +0000352 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000353 """Destructor. Calls close()."""
354 # The try/except block is in case this is called at program
355 # exit time, when it's possible that globals have already been
356 # deleted, and then the close() call might fail. Since
357 # there's nothing we can do about such failures and they annoy
358 # the end users, we suppress the traceback.
359 try:
360 self.close()
361 except:
362 pass
363
364 ### Inquiries ###
365
Raymond Hettinger3c940242011-01-12 23:39:31 +0000366 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000367 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000369 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 This method may need to do a test seek().
371 """
372 return False
373
374 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000375 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """
377 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000378 raise UnsupportedOperation("File or stream is not seekable."
379 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
Raymond Hettinger3c940242011-01-12 23:39:31 +0000381 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000382 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000384 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385 """
386 return False
387
388 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000389 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390 """
391 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000392 raise UnsupportedOperation("File or stream is not readable."
393 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394
Raymond Hettinger3c940242011-01-12 23:39:31 +0000395 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000396 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000398 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399 """
400 return False
401
402 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000403 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 """
405 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000406 raise UnsupportedOperation("File or stream is not writable."
407 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408
409 @property
410 def closed(self):
411 """closed: bool. True iff the file has been closed.
412
413 For backwards compatibility, this is a property, not a predicate.
414 """
415 return self.__closed
416
417 def _checkClosed(self, msg=None):
418 """Internal: raise an ValueError if file is closed
419 """
420 if self.closed:
421 raise ValueError("I/O operation on closed file."
422 if msg is None else msg)
423
424 ### Context manager ###
425
Raymond Hettinger3c940242011-01-12 23:39:31 +0000426 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000427 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 self._checkClosed()
429 return self
430
Raymond Hettinger3c940242011-01-12 23:39:31 +0000431 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 """Context management protocol. Calls close()"""
433 self.close()
434
435 ### Lower-level APIs ###
436
437 # XXX Should these be present even if unimplemented?
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000440 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441
442 An IOError is raised if the IO object does not use a file descriptor.
443 """
444 self._unsupported("fileno")
445
Raymond Hettinger3c940242011-01-12 23:39:31 +0000446 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000447 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448
449 Return False if it can't be determined.
450 """
451 self._checkClosed()
452 return False
453
454 ### Readline[s] and writelines ###
455
Georg Brandl4d73b572011-01-13 07:13:06 +0000456 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000457 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458
459 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000460 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461
462 The line terminator is always b'\n' for binary files; for text
463 files, the newlines argument to open can be used to select the line
464 terminator(s) recognized.
465 """
466 # For backwards compatibility, a (slowish) readline().
467 if hasattr(self, "peek"):
468 def nreadahead():
469 readahead = self.peek(1)
470 if not readahead:
471 return 1
472 n = (readahead.find(b"\n") + 1) or len(readahead)
473 if limit >= 0:
474 n = min(n, limit)
475 return n
476 else:
477 def nreadahead():
478 return 1
479 if limit is None:
480 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000481 elif not isinstance(limit, int):
482 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 res = bytearray()
484 while limit < 0 or len(res) < limit:
485 b = self.read(nreadahead())
486 if not b:
487 break
488 res += b
489 if res.endswith(b"\n"):
490 break
491 return bytes(res)
492
493 def __iter__(self):
494 self._checkClosed()
495 return self
496
497 def __next__(self):
498 line = self.readline()
499 if not line:
500 raise StopIteration
501 return line
502
503 def readlines(self, hint=None):
504 """Return a list of lines from the stream.
505
506 hint can be specified to control the number of lines read: no more
507 lines will be read if the total size (in bytes/characters) of all
508 lines so far exceeds hint.
509 """
510 if hint is None or hint <= 0:
511 return list(self)
512 n = 0
513 lines = []
514 for line in self:
515 lines.append(line)
516 n += len(line)
517 if n >= hint:
518 break
519 return lines
520
521 def writelines(self, lines):
522 self._checkClosed()
523 for line in lines:
524 self.write(line)
525
526io.IOBase.register(IOBase)
527
528
529class RawIOBase(IOBase):
530
531 """Base class for raw binary I/O."""
532
533 # The read() method is implemented by calling readinto(); derived
534 # classes that want to support read() only need to implement
535 # readinto() as a primitive operation. In general, readinto() can be
536 # more efficient than read().
537
538 # (It would be tempting to also provide an implementation of
539 # readinto() in terms of read(), in case the latter is a more suitable
540 # primitive operation, but that would lead to nasty recursion in case
541 # a subclass doesn't implement either.)
542
Georg Brandl4d73b572011-01-13 07:13:06 +0000543 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000544 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545
546 Returns an empty bytes object on EOF, or None if the object is
547 set not to block and has no data to read.
548 """
549 if n is None:
550 n = -1
551 if n < 0:
552 return self.readall()
553 b = bytearray(n.__index__())
554 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000555 if n is None:
556 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557 del b[n:]
558 return bytes(b)
559
560 def readall(self):
561 """Read until EOF, using multiple read() call."""
562 res = bytearray()
563 while True:
564 data = self.read(DEFAULT_BUFFER_SIZE)
565 if not data:
566 break
567 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200568 if res:
569 return bytes(res)
570 else:
571 # b'' or None
572 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Raymond Hettinger3c940242011-01-12 23:39:31 +0000574 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000575 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576
Raymond Hettingercbb80892011-01-13 18:15:51 +0000577 Returns an int representing the number of bytes read (0 for EOF), or
578 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 """
580 self._unsupported("readinto")
581
Raymond Hettinger3c940242011-01-12 23:39:31 +0000582 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 """Write the given buffer to the IO stream.
584
585 Returns the number of bytes written, which may be less than len(b).
586 """
587 self._unsupported("write")
588
589io.RawIOBase.register(RawIOBase)
590from _io import FileIO
591RawIOBase.register(FileIO)
592
593
594class BufferedIOBase(IOBase):
595
596 """Base class for buffered IO objects.
597
598 The main difference with RawIOBase is that the read() method
599 supports omitting the size argument, and does not have a default
600 implementation that defers to readinto().
601
602 In addition, read(), readinto() and write() may raise
603 BlockingIOError if the underlying raw stream is in non-blocking
604 mode and not ready; unlike their raw counterparts, they will never
605 return None.
606
607 A typical implementation should not inherit from a RawIOBase
608 implementation, but wrap one.
609 """
610
Georg Brandl4d73b572011-01-13 07:13:06 +0000611 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000612 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613
614 If the argument is omitted, None, or negative, reads and
615 returns all data until EOF.
616
617 If the argument is positive, and the underlying raw stream is
618 not 'interactive', multiple raw reads may be issued to satisfy
619 the byte count (unless EOF is reached first). But for
620 interactive raw streams (XXX and for pipes?), at most one raw
621 read will be issued, and a short result does not imply that
622 EOF is imminent.
623
624 Returns an empty bytes array on EOF.
625
626 Raises BlockingIOError if the underlying raw stream has no
627 data at the moment.
628 """
629 self._unsupported("read")
630
Georg Brandl4d73b572011-01-13 07:13:06 +0000631 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000632 """Read up to n bytes with at most one read() system call,
633 where n is an int.
634 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 self._unsupported("read1")
636
Raymond Hettinger3c940242011-01-12 23:39:31 +0000637 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000638 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639
640 Like read(), this may issue multiple reads to the underlying raw
641 stream, unless the latter is 'interactive'.
642
Raymond Hettingercbb80892011-01-13 18:15:51 +0000643 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644
645 Raises BlockingIOError if the underlying raw stream has no
646 data at the moment.
647 """
648 # XXX This ought to work with anything that supports the buffer API
649 data = self.read(len(b))
650 n = len(data)
651 try:
652 b[:n] = data
653 except TypeError as err:
654 import array
655 if not isinstance(b, array.array):
656 raise err
657 b[:n] = array.array('b', data)
658 return n
659
Raymond Hettinger3c940242011-01-12 23:39:31 +0000660 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662
663 Return the number of bytes written, which is never less than
664 len(b).
665
666 Raises BlockingIOError if the buffer is full and the
667 underlying raw stream cannot accept more data at the moment.
668 """
669 self._unsupported("write")
670
Raymond Hettinger3c940242011-01-12 23:39:31 +0000671 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000672 """
673 Separate the underlying raw stream from the buffer and return it.
674
675 After the raw stream has been detached, the buffer is in an unusable
676 state.
677 """
678 self._unsupported("detach")
679
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680io.BufferedIOBase.register(BufferedIOBase)
681
682
683class _BufferedIOMixin(BufferedIOBase):
684
685 """A mixin implementation of BufferedIOBase with an underlying raw stream.
686
687 This passes most requests on to the underlying raw stream. It
688 does *not* provide implementations of read(), readinto() or
689 write().
690 """
691
692 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000693 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694
695 ### Positioning ###
696
697 def seek(self, pos, whence=0):
698 new_position = self.raw.seek(pos, whence)
699 if new_position < 0:
700 raise IOError("seek() returned an invalid position")
701 return new_position
702
703 def tell(self):
704 pos = self.raw.tell()
705 if pos < 0:
706 raise IOError("tell() returned an invalid position")
707 return pos
708
709 def truncate(self, pos=None):
710 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
711 # and a flush may be necessary to synch both views of the current
712 # file state.
713 self.flush()
714
715 if pos is None:
716 pos = self.tell()
717 # XXX: Should seek() be used, instead of passing the position
718 # XXX directly to truncate?
719 return self.raw.truncate(pos)
720
721 ### Flush and close ###
722
723 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000724 if self.closed:
725 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726 self.raw.flush()
727
728 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000729 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100730 try:
731 # may raise BlockingIOError or BrokenPipeError etc
732 self.flush()
733 finally:
734 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000736 def detach(self):
737 if self.raw is None:
738 raise ValueError("raw stream already detached")
739 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000740 raw = self._raw
741 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000742 return raw
743
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744 ### Inquiries ###
745
746 def seekable(self):
747 return self.raw.seekable()
748
749 def readable(self):
750 return self.raw.readable()
751
752 def writable(self):
753 return self.raw.writable()
754
755 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000756 def raw(self):
757 return self._raw
758
759 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760 def closed(self):
761 return self.raw.closed
762
763 @property
764 def name(self):
765 return self.raw.name
766
767 @property
768 def mode(self):
769 return self.raw.mode
770
Antoine Pitrou243757e2010-11-05 21:15:39 +0000771 def __getstate__(self):
772 raise TypeError("can not serialize a '{0}' object"
773 .format(self.__class__.__name__))
774
Antoine Pitrou716c4442009-05-23 19:04:03 +0000775 def __repr__(self):
776 clsname = self.__class__.__name__
777 try:
778 name = self.name
779 except AttributeError:
780 return "<_pyio.{0}>".format(clsname)
781 else:
782 return "<_pyio.{0} name={1!r}>".format(clsname, name)
783
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 ### Lower-level APIs ###
785
786 def fileno(self):
787 return self.raw.fileno()
788
789 def isatty(self):
790 return self.raw.isatty()
791
792
793class BytesIO(BufferedIOBase):
794
795 """Buffered I/O implementation using an in-memory bytes buffer."""
796
797 def __init__(self, initial_bytes=None):
798 buf = bytearray()
799 if initial_bytes is not None:
800 buf += initial_bytes
801 self._buffer = buf
802 self._pos = 0
803
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000804 def __getstate__(self):
805 if self.closed:
806 raise ValueError("__getstate__ on closed file")
807 return self.__dict__.copy()
808
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 def getvalue(self):
810 """Return the bytes value (contents) of the buffer
811 """
812 if self.closed:
813 raise ValueError("getvalue on closed file")
814 return bytes(self._buffer)
815
Antoine Pitrou972ee132010-09-06 18:48:21 +0000816 def getbuffer(self):
817 """Return a readable and writable view of the buffer.
818 """
819 return memoryview(self._buffer)
820
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821 def read(self, n=None):
822 if self.closed:
823 raise ValueError("read from closed file")
824 if n is None:
825 n = -1
826 if n < 0:
827 n = len(self._buffer)
828 if len(self._buffer) <= self._pos:
829 return b""
830 newpos = min(len(self._buffer), self._pos + n)
831 b = self._buffer[self._pos : newpos]
832 self._pos = newpos
833 return bytes(b)
834
835 def read1(self, n):
836 """This is the same as read.
837 """
838 return self.read(n)
839
840 def write(self, b):
841 if self.closed:
842 raise ValueError("write to closed file")
843 if isinstance(b, str):
844 raise TypeError("can't write str to binary stream")
845 n = len(b)
846 if n == 0:
847 return 0
848 pos = self._pos
849 if pos > len(self._buffer):
850 # Inserts null bytes between the current end of the file
851 # and the new write position.
852 padding = b'\x00' * (pos - len(self._buffer))
853 self._buffer += padding
854 self._buffer[pos:pos + n] = b
855 self._pos += n
856 return n
857
858 def seek(self, pos, whence=0):
859 if self.closed:
860 raise ValueError("seek on closed file")
861 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000862 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 except AttributeError as err:
864 raise TypeError("an integer is required") from err
865 if whence == 0:
866 if pos < 0:
867 raise ValueError("negative seek position %r" % (pos,))
868 self._pos = pos
869 elif whence == 1:
870 self._pos = max(0, self._pos + pos)
871 elif whence == 2:
872 self._pos = max(0, len(self._buffer) + pos)
873 else:
Jesus Cea94363612012-06-22 18:32:07 +0200874 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 return self._pos
876
877 def tell(self):
878 if self.closed:
879 raise ValueError("tell on closed file")
880 return self._pos
881
882 def truncate(self, pos=None):
883 if self.closed:
884 raise ValueError("truncate on closed file")
885 if pos is None:
886 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000887 else:
888 try:
889 pos.__index__
890 except AttributeError as err:
891 raise TypeError("an integer is required") from err
892 if pos < 0:
893 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000894 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000895 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896
897 def readable(self):
898 return True
899
900 def writable(self):
901 return True
902
903 def seekable(self):
904 return True
905
906
907class BufferedReader(_BufferedIOMixin):
908
909 """BufferedReader(raw[, buffer_size])
910
911 A buffer for a readable, sequential BaseRawIO object.
912
913 The constructor creates a BufferedReader for the given readable raw
914 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
915 is used.
916 """
917
918 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
919 """Create a new buffered reader using the given readable raw IO object.
920 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000921 if not raw.readable():
922 raise IOError('"raw" argument must be readable.')
923
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 _BufferedIOMixin.__init__(self, raw)
925 if buffer_size <= 0:
926 raise ValueError("invalid buffer size")
927 self.buffer_size = buffer_size
928 self._reset_read_buf()
929 self._read_lock = Lock()
930
931 def _reset_read_buf(self):
932 self._read_buf = b""
933 self._read_pos = 0
934
935 def read(self, n=None):
936 """Read n bytes.
937
938 Returns exactly n bytes of data unless the underlying raw IO
939 stream reaches EOF or if the call would block in non-blocking
940 mode. If n is negative, read until EOF or until read() would
941 block.
942 """
943 if n is not None and n < -1:
944 raise ValueError("invalid number of bytes to read")
945 with self._read_lock:
946 return self._read_unlocked(n)
947
948 def _read_unlocked(self, n=None):
949 nodata_val = b""
950 empty_values = (b"", None)
951 buf = self._read_buf
952 pos = self._read_pos
953
954 # Special case for when the number of bytes to read is unspecified.
955 if n is None or n == -1:
956 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200957 if hasattr(self.raw, 'readall'):
958 chunk = self.raw.readall()
959 if chunk is None:
960 return buf[pos:] or None
961 else:
962 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 chunks = [buf[pos:]] # Strip the consumed bytes.
964 current_size = 0
965 while True:
966 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000967 try:
968 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200969 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000970 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000971 if chunk in empty_values:
972 nodata_val = chunk
973 break
974 current_size += len(chunk)
975 chunks.append(chunk)
976 return b"".join(chunks) or nodata_val
977
978 # The number of bytes to read is specified, return at most n bytes.
979 avail = len(buf) - pos # Length of the available buffered data.
980 if n <= avail:
981 # Fast path: the data to read is fully buffered.
982 self._read_pos += n
983 return buf[pos:pos+n]
984 # Slow path: read from the stream until enough bytes are read,
985 # or until an EOF occurs or until read() would block.
986 chunks = [buf[pos:]]
987 wanted = max(self.buffer_size, n)
988 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000989 try:
990 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200991 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000992 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993 if chunk in empty_values:
994 nodata_val = chunk
995 break
996 avail += len(chunk)
997 chunks.append(chunk)
998 # n is more then avail only when an EOF occurred or when
999 # read() would have blocked.
1000 n = min(n, avail)
1001 out = b"".join(chunks)
1002 self._read_buf = out[n:] # Save the extra data in the buffer.
1003 self._read_pos = 0
1004 return out[:n] if out else nodata_val
1005
1006 def peek(self, n=0):
1007 """Returns buffered bytes without advancing the position.
1008
1009 The argument indicates a desired minimal number of bytes; we
1010 do at most one raw read to satisfy it. We never return more
1011 than self.buffer_size.
1012 """
1013 with self._read_lock:
1014 return self._peek_unlocked(n)
1015
1016 def _peek_unlocked(self, n=0):
1017 want = min(n, self.buffer_size)
1018 have = len(self._read_buf) - self._read_pos
1019 if have < want or have <= 0:
1020 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001021 while True:
1022 try:
1023 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001024 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001025 continue
1026 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 if current:
1028 self._read_buf = self._read_buf[self._read_pos:] + current
1029 self._read_pos = 0
1030 return self._read_buf[self._read_pos:]
1031
1032 def read1(self, n):
1033 """Reads up to n bytes, with at most one read() system call."""
1034 # Returns up to n bytes. If at least one byte is buffered, we
1035 # only return buffered bytes. Otherwise, we do one raw read.
1036 if n < 0:
1037 raise ValueError("number of bytes to read must be positive")
1038 if n == 0:
1039 return b""
1040 with self._read_lock:
1041 self._peek_unlocked(1)
1042 return self._read_unlocked(
1043 min(n, len(self._read_buf) - self._read_pos))
1044
1045 def tell(self):
1046 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1047
1048 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001049 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001050 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001051 with self._read_lock:
1052 if whence == 1:
1053 pos -= len(self._read_buf) - self._read_pos
1054 pos = _BufferedIOMixin.seek(self, pos, whence)
1055 self._reset_read_buf()
1056 return pos
1057
1058class BufferedWriter(_BufferedIOMixin):
1059
1060 """A buffer for a writeable sequential RawIO object.
1061
1062 The constructor creates a BufferedWriter for the given writeable raw
1063 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001064 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 """
1066
Florent Xicluna109d5732012-07-07 17:03:22 +02001067 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001068 if not raw.writable():
1069 raise IOError('"raw" argument must be writable.')
1070
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 _BufferedIOMixin.__init__(self, raw)
1072 if buffer_size <= 0:
1073 raise ValueError("invalid buffer size")
1074 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075 self._write_buf = bytearray()
1076 self._write_lock = Lock()
1077
1078 def write(self, b):
1079 if self.closed:
1080 raise ValueError("write to closed file")
1081 if isinstance(b, str):
1082 raise TypeError("can't write str to binary stream")
1083 with self._write_lock:
1084 # XXX we can implement some more tricks to try and avoid
1085 # partial writes
1086 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001087 # We're full, so let's pre-flush the buffer. (This may
1088 # raise BlockingIOError with characters_written == 0.)
1089 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 before = len(self._write_buf)
1091 self._write_buf.extend(b)
1092 written = len(self._write_buf) - before
1093 if len(self._write_buf) > self.buffer_size:
1094 try:
1095 self._flush_unlocked()
1096 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001097 if len(self._write_buf) > self.buffer_size:
1098 # We've hit the buffer_size. We have to accept a partial
1099 # write and cut back our buffer.
1100 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001101 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001102 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103 raise BlockingIOError(e.errno, e.strerror, written)
1104 return written
1105
1106 def truncate(self, pos=None):
1107 with self._write_lock:
1108 self._flush_unlocked()
1109 if pos is None:
1110 pos = self.raw.tell()
1111 return self.raw.truncate(pos)
1112
1113 def flush(self):
1114 with self._write_lock:
1115 self._flush_unlocked()
1116
1117 def _flush_unlocked(self):
1118 if self.closed:
1119 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001120 while self._write_buf:
1121 try:
1122 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001123 except InterruptedError:
1124 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001125 except BlockingIOError:
1126 raise RuntimeError("self.raw should implement RawIOBase: it "
1127 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001128 if n is None:
1129 raise BlockingIOError(
1130 errno.EAGAIN,
1131 "write could not complete without blocking", 0)
1132 if n > len(self._write_buf) or n < 0:
1133 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135
1136 def tell(self):
1137 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1138
1139 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001140 if whence not in valid_seek_flags:
1141 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 with self._write_lock:
1143 self._flush_unlocked()
1144 return _BufferedIOMixin.seek(self, pos, whence)
1145
1146
1147class BufferedRWPair(BufferedIOBase):
1148
1149 """A buffered reader and writer object together.
1150
1151 A buffered reader object and buffered writer object put together to
1152 form a sequential IO object that can read and write. This is typically
1153 used with a socket or two-way pipe.
1154
1155 reader and writer are RawIOBase objects that are readable and
1156 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001157 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 """
1159
1160 # XXX The usefulness of this (compared to having two separate IO
1161 # objects) is questionable.
1162
Florent Xicluna109d5732012-07-07 17:03:22 +02001163 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 """Constructor.
1165
1166 The arguments are two RawIO instances.
1167 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001168 if not reader.readable():
1169 raise IOError('"reader" argument must be readable.')
1170
1171 if not writer.writable():
1172 raise IOError('"writer" argument must be writable.')
1173
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001175 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176
1177 def read(self, n=None):
1178 if n is None:
1179 n = -1
1180 return self.reader.read(n)
1181
1182 def readinto(self, b):
1183 return self.reader.readinto(b)
1184
1185 def write(self, b):
1186 return self.writer.write(b)
1187
1188 def peek(self, n=0):
1189 return self.reader.peek(n)
1190
1191 def read1(self, n):
1192 return self.reader.read1(n)
1193
1194 def readable(self):
1195 return self.reader.readable()
1196
1197 def writable(self):
1198 return self.writer.writable()
1199
1200 def flush(self):
1201 return self.writer.flush()
1202
1203 def close(self):
1204 self.writer.close()
1205 self.reader.close()
1206
1207 def isatty(self):
1208 return self.reader.isatty() or self.writer.isatty()
1209
1210 @property
1211 def closed(self):
1212 return self.writer.closed
1213
1214
1215class BufferedRandom(BufferedWriter, BufferedReader):
1216
1217 """A buffered interface to random access streams.
1218
1219 The constructor creates a reader and writer for a seekable stream,
1220 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001221 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 """
1223
Florent Xicluna109d5732012-07-07 17:03:22 +02001224 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225 raw._checkSeekable()
1226 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001227 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228
1229 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001230 if whence not in valid_seek_flags:
1231 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 self.flush()
1233 if self._read_buf:
1234 # Undo read ahead.
1235 with self._read_lock:
1236 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1237 # First do the raw seek, then empty the read buffer, so that
1238 # if the raw seek fails, we don't lose buffered data forever.
1239 pos = self.raw.seek(pos, whence)
1240 with self._read_lock:
1241 self._reset_read_buf()
1242 if pos < 0:
1243 raise IOError("seek() returned invalid position")
1244 return pos
1245
1246 def tell(self):
1247 if self._write_buf:
1248 return BufferedWriter.tell(self)
1249 else:
1250 return BufferedReader.tell(self)
1251
1252 def truncate(self, pos=None):
1253 if pos is None:
1254 pos = self.tell()
1255 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001256 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257
1258 def read(self, n=None):
1259 if n is None:
1260 n = -1
1261 self.flush()
1262 return BufferedReader.read(self, n)
1263
1264 def readinto(self, b):
1265 self.flush()
1266 return BufferedReader.readinto(self, b)
1267
1268 def peek(self, n=0):
1269 self.flush()
1270 return BufferedReader.peek(self, n)
1271
1272 def read1(self, n):
1273 self.flush()
1274 return BufferedReader.read1(self, n)
1275
1276 def write(self, b):
1277 if self._read_buf:
1278 # Undo readahead
1279 with self._read_lock:
1280 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1281 self._reset_read_buf()
1282 return BufferedWriter.write(self, b)
1283
1284
1285class TextIOBase(IOBase):
1286
1287 """Base class for text I/O.
1288
1289 This class provides a character and line based interface to stream
1290 I/O. There is no readinto method because Python's character strings
1291 are immutable. There is no public constructor.
1292 """
1293
Georg Brandl4d73b572011-01-13 07:13:06 +00001294 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001295 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296
1297 Read from underlying buffer until we have n characters or we hit EOF.
1298 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001299
1300 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 """
1302 self._unsupported("read")
1303
Raymond Hettinger3c940242011-01-12 23:39:31 +00001304 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001305 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306 self._unsupported("write")
1307
Georg Brandl4d73b572011-01-13 07:13:06 +00001308 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001309 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001310 self._unsupported("truncate")
1311
Raymond Hettinger3c940242011-01-12 23:39:31 +00001312 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001313 """Read until newline or EOF.
1314
1315 Returns an empty string if EOF is hit immediately.
1316 """
1317 self._unsupported("readline")
1318
Raymond Hettinger3c940242011-01-12 23:39:31 +00001319 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001320 """
1321 Separate the underlying buffer from the TextIOBase and return it.
1322
1323 After the underlying buffer has been detached, the TextIO is in an
1324 unusable state.
1325 """
1326 self._unsupported("detach")
1327
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 @property
1329 def encoding(self):
1330 """Subclasses should override."""
1331 return None
1332
1333 @property
1334 def newlines(self):
1335 """Line endings translated so far.
1336
1337 Only line endings translated during reading are considered.
1338
1339 Subclasses should override.
1340 """
1341 return None
1342
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001343 @property
1344 def errors(self):
1345 """Error setting of the decoder or encoder.
1346
1347 Subclasses should override."""
1348 return None
1349
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001350io.TextIOBase.register(TextIOBase)
1351
1352
1353class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1354 r"""Codec used when reading a file in universal newlines mode. It wraps
1355 another incremental decoder, translating \r\n and \r into \n. It also
1356 records the types of newlines encountered. When used with
1357 translate=False, it ensures that the newline sequence is returned in
1358 one piece.
1359 """
1360 def __init__(self, decoder, translate, errors='strict'):
1361 codecs.IncrementalDecoder.__init__(self, errors=errors)
1362 self.translate = translate
1363 self.decoder = decoder
1364 self.seennl = 0
1365 self.pendingcr = False
1366
1367 def decode(self, input, final=False):
1368 # decode input (with the eventual \r from a previous pass)
1369 if self.decoder is None:
1370 output = input
1371 else:
1372 output = self.decoder.decode(input, final=final)
1373 if self.pendingcr and (output or final):
1374 output = "\r" + output
1375 self.pendingcr = False
1376
1377 # retain last \r even when not translating data:
1378 # then readline() is sure to get \r\n in one pass
1379 if output.endswith("\r") and not final:
1380 output = output[:-1]
1381 self.pendingcr = True
1382
1383 # Record which newlines are read
1384 crlf = output.count('\r\n')
1385 cr = output.count('\r') - crlf
1386 lf = output.count('\n') - crlf
1387 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1388 | (crlf and self._CRLF)
1389
1390 if self.translate:
1391 if crlf:
1392 output = output.replace("\r\n", "\n")
1393 if cr:
1394 output = output.replace("\r", "\n")
1395
1396 return output
1397
1398 def getstate(self):
1399 if self.decoder is None:
1400 buf = b""
1401 flag = 0
1402 else:
1403 buf, flag = self.decoder.getstate()
1404 flag <<= 1
1405 if self.pendingcr:
1406 flag |= 1
1407 return buf, flag
1408
1409 def setstate(self, state):
1410 buf, flag = state
1411 self.pendingcr = bool(flag & 1)
1412 if self.decoder is not None:
1413 self.decoder.setstate((buf, flag >> 1))
1414
1415 def reset(self):
1416 self.seennl = 0
1417 self.pendingcr = False
1418 if self.decoder is not None:
1419 self.decoder.reset()
1420
1421 _LF = 1
1422 _CR = 2
1423 _CRLF = 4
1424
1425 @property
1426 def newlines(self):
1427 return (None,
1428 "\n",
1429 "\r",
1430 ("\r", "\n"),
1431 "\r\n",
1432 ("\n", "\r\n"),
1433 ("\r", "\r\n"),
1434 ("\r", "\n", "\r\n")
1435 )[self.seennl]
1436
1437
1438class TextIOWrapper(TextIOBase):
1439
1440 r"""Character and line based layer over a BufferedIOBase object, buffer.
1441
1442 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001443 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444
1445 errors determines the strictness of encoding and decoding (see the
1446 codecs.register) and defaults to "strict".
1447
1448 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1449 handling of line endings. If it is None, universal newlines is
1450 enabled. With this enabled, on input, the lines endings '\n', '\r',
1451 or '\r\n' are translated to '\n' before being returned to the
1452 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001453 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001454 legal values, that newline becomes the newline when the file is read
1455 and it is returned untranslated. On output, '\n' is converted to the
1456 newline.
1457
1458 If line_buffering is True, a call to flush is implied when a call to
1459 write contains a newline character.
1460 """
1461
1462 _CHUNK_SIZE = 2048
1463
1464 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001465 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466 if newline is not None and not isinstance(newline, str):
1467 raise TypeError("illegal newline type: %r" % (type(newline),))
1468 if newline not in (None, "", "\n", "\r", "\r\n"):
1469 raise ValueError("illegal newline value: %r" % (newline,))
1470 if encoding is None:
1471 try:
1472 encoding = os.device_encoding(buffer.fileno())
1473 except (AttributeError, UnsupportedOperation):
1474 pass
1475 if encoding is None:
1476 try:
1477 import locale
1478 except ImportError:
1479 # Importing locale may fail if Python is being built
1480 encoding = "ascii"
1481 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001482 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483
1484 if not isinstance(encoding, str):
1485 raise ValueError("invalid encoding: %r" % encoding)
1486
1487 if errors is None:
1488 errors = "strict"
1489 else:
1490 if not isinstance(errors, str):
1491 raise ValueError("invalid errors: %r" % errors)
1492
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001493 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001494 self._line_buffering = line_buffering
1495 self._encoding = encoding
1496 self._errors = errors
1497 self._readuniversal = not newline
1498 self._readtranslate = newline is None
1499 self._readnl = newline
1500 self._writetranslate = newline != ''
1501 self._writenl = newline or os.linesep
1502 self._encoder = None
1503 self._decoder = None
1504 self._decoded_chars = '' # buffer for text returned from decoder
1505 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1506 self._snapshot = None # info for reconstructing decoder state
1507 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001508 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001509 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510
Antoine Pitroue4501852009-05-14 18:55:55 +00001511 if self._seekable and self.writable():
1512 position = self.buffer.tell()
1513 if position != 0:
1514 try:
1515 self._get_encoder().setstate(0)
1516 except LookupError:
1517 # Sometimes the encoder doesn't exist
1518 pass
1519
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001520 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1521 # where dec_flags is the second (integer) item of the decoder state
1522 # and next_input is the chunk of input bytes that comes next after the
1523 # snapshot point. We use this to reconstruct decoder states in tell().
1524
1525 # Naming convention:
1526 # - "bytes_..." for integer variables that count input bytes
1527 # - "chars_..." for integer variables that count decoded characters
1528
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001529 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001530 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001531 try:
1532 name = self.name
1533 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001534 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001535 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001536 result += " name={0!r}".format(name)
1537 try:
1538 mode = self.mode
1539 except AttributeError:
1540 pass
1541 else:
1542 result += " mode={0!r}".format(mode)
1543 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001544
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545 @property
1546 def encoding(self):
1547 return self._encoding
1548
1549 @property
1550 def errors(self):
1551 return self._errors
1552
1553 @property
1554 def line_buffering(self):
1555 return self._line_buffering
1556
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001557 @property
1558 def buffer(self):
1559 return self._buffer
1560
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 def seekable(self):
1562 return self._seekable
1563
1564 def readable(self):
1565 return self.buffer.readable()
1566
1567 def writable(self):
1568 return self.buffer.writable()
1569
1570 def flush(self):
1571 self.buffer.flush()
1572 self._telling = self._seekable
1573
1574 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001575 if self.buffer is not None and not self.closed:
1576 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001577 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001578
1579 @property
1580 def closed(self):
1581 return self.buffer.closed
1582
1583 @property
1584 def name(self):
1585 return self.buffer.name
1586
1587 def fileno(self):
1588 return self.buffer.fileno()
1589
1590 def isatty(self):
1591 return self.buffer.isatty()
1592
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001593 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001594 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595 if self.closed:
1596 raise ValueError("write to closed file")
1597 if not isinstance(s, str):
1598 raise TypeError("can't write %s to text stream" %
1599 s.__class__.__name__)
1600 length = len(s)
1601 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1602 if haslf and self._writetranslate and self._writenl != "\n":
1603 s = s.replace("\n", self._writenl)
1604 encoder = self._encoder or self._get_encoder()
1605 # XXX What if we were just reading?
1606 b = encoder.encode(s)
1607 self.buffer.write(b)
1608 if self._line_buffering and (haslf or "\r" in s):
1609 self.flush()
1610 self._snapshot = None
1611 if self._decoder:
1612 self._decoder.reset()
1613 return length
1614
1615 def _get_encoder(self):
1616 make_encoder = codecs.getincrementalencoder(self._encoding)
1617 self._encoder = make_encoder(self._errors)
1618 return self._encoder
1619
1620 def _get_decoder(self):
1621 make_decoder = codecs.getincrementaldecoder(self._encoding)
1622 decoder = make_decoder(self._errors)
1623 if self._readuniversal:
1624 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1625 self._decoder = decoder
1626 return decoder
1627
1628 # The following three methods implement an ADT for _decoded_chars.
1629 # Text returned from the decoder is buffered here until the client
1630 # requests it by calling our read() or readline() method.
1631 def _set_decoded_chars(self, chars):
1632 """Set the _decoded_chars buffer."""
1633 self._decoded_chars = chars
1634 self._decoded_chars_used = 0
1635
1636 def _get_decoded_chars(self, n=None):
1637 """Advance into the _decoded_chars buffer."""
1638 offset = self._decoded_chars_used
1639 if n is None:
1640 chars = self._decoded_chars[offset:]
1641 else:
1642 chars = self._decoded_chars[offset:offset + n]
1643 self._decoded_chars_used += len(chars)
1644 return chars
1645
1646 def _rewind_decoded_chars(self, n):
1647 """Rewind the _decoded_chars buffer."""
1648 if self._decoded_chars_used < n:
1649 raise AssertionError("rewind decoded_chars out of bounds")
1650 self._decoded_chars_used -= n
1651
1652 def _read_chunk(self):
1653 """
1654 Read and decode the next chunk of data from the BufferedReader.
1655 """
1656
1657 # The return value is True unless EOF was reached. The decoded
1658 # string is placed in self._decoded_chars (replacing its previous
1659 # value). The entire input chunk is sent to the decoder, though
1660 # some of it may remain buffered in the decoder, yet to be
1661 # converted.
1662
1663 if self._decoder is None:
1664 raise ValueError("no decoder")
1665
1666 if self._telling:
1667 # To prepare for tell(), we need to snapshot a point in the
1668 # file where the decoder's input buffer is empty.
1669
1670 dec_buffer, dec_flags = self._decoder.getstate()
1671 # Given this, we know there was a valid snapshot point
1672 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1673
1674 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001675 if self._has_read1:
1676 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1677 else:
1678 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001680 decoded_chars = self._decoder.decode(input_chunk, eof)
1681 self._set_decoded_chars(decoded_chars)
1682 if decoded_chars:
1683 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1684 else:
1685 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686
1687 if self._telling:
1688 # At the snapshot point, len(dec_buffer) bytes before the read,
1689 # the next input to be decoded is dec_buffer + input_chunk.
1690 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1691
1692 return not eof
1693
1694 def _pack_cookie(self, position, dec_flags=0,
1695 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1696 # The meaning of a tell() cookie is: seek to position, set the
1697 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1698 # into the decoder with need_eof as the EOF flag, then skip
1699 # chars_to_skip characters of the decoded result. For most simple
1700 # decoders, tell() will often just give a byte offset in the file.
1701 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1702 (chars_to_skip<<192) | bool(need_eof)<<256)
1703
1704 def _unpack_cookie(self, bigint):
1705 rest, position = divmod(bigint, 1<<64)
1706 rest, dec_flags = divmod(rest, 1<<64)
1707 rest, bytes_to_feed = divmod(rest, 1<<64)
1708 need_eof, chars_to_skip = divmod(rest, 1<<64)
1709 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1710
1711 def tell(self):
1712 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001713 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 if not self._telling:
1715 raise IOError("telling position disabled by next() call")
1716 self.flush()
1717 position = self.buffer.tell()
1718 decoder = self._decoder
1719 if decoder is None or self._snapshot is None:
1720 if self._decoded_chars:
1721 # This should never happen.
1722 raise AssertionError("pending decoded text")
1723 return position
1724
1725 # Skip backward to the snapshot point (see _read_chunk).
1726 dec_flags, next_input = self._snapshot
1727 position -= len(next_input)
1728
1729 # How many decoded characters have been used up since the snapshot?
1730 chars_to_skip = self._decoded_chars_used
1731 if chars_to_skip == 0:
1732 # We haven't moved from the snapshot point.
1733 return self._pack_cookie(position, dec_flags)
1734
1735 # Starting from the snapshot position, we will walk the decoder
1736 # forward until it gives us enough decoded characters.
1737 saved_state = decoder.getstate()
1738 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001739 # Fast search for an acceptable start point, close to our
1740 # current pos.
1741 # Rationale: calling decoder.decode() has a large overhead
1742 # regardless of chunk size; we want the number of such calls to
1743 # be O(1) in most situations (common decoders, non-crazy input).
1744 # Actually, it will be exactly 1 for fixed-size codecs (all
1745 # 8-bit codecs, also UTF-16 and UTF-32).
1746 skip_bytes = int(self._b2cratio * chars_to_skip)
1747 skip_back = 1
1748 assert skip_bytes <= len(next_input)
1749 while skip_bytes > 0:
1750 decoder.setstate((b'', dec_flags))
1751 # Decode up to temptative start point
1752 n = len(decoder.decode(next_input[:skip_bytes]))
1753 if n <= chars_to_skip:
1754 b, d = decoder.getstate()
1755 if not b:
1756 # Before pos and no bytes buffered in decoder => OK
1757 dec_flags = d
1758 chars_to_skip -= n
1759 break
1760 # Skip back by buffered amount and reset heuristic
1761 skip_bytes -= len(b)
1762 skip_back = 1
1763 else:
1764 # We're too far ahead, skip back a bit
1765 skip_bytes -= skip_back
1766 skip_back = skip_back * 2
1767 else:
1768 skip_bytes = 0
1769 decoder.setstate((b'', dec_flags))
1770
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001772 start_pos = position + skip_bytes
1773 start_flags = dec_flags
1774 if chars_to_skip == 0:
1775 # We haven't moved from the start point.
1776 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777
1778 # Feed the decoder one byte at a time. As we go, note the
1779 # nearest "safe start point" before the current location
1780 # (a point where the decoder has nothing buffered, so seek()
1781 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001782 bytes_fed = 0
1783 need_eof = 0
1784 # Chars decoded since `start_pos`
1785 chars_decoded = 0
1786 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001788 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 dec_buffer, dec_flags = decoder.getstate()
1790 if not dec_buffer and chars_decoded <= chars_to_skip:
1791 # Decoder buffer is empty, so this is a safe start point.
1792 start_pos += bytes_fed
1793 chars_to_skip -= chars_decoded
1794 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1795 if chars_decoded >= chars_to_skip:
1796 break
1797 else:
1798 # We didn't get enough decoded data; signal EOF to get more.
1799 chars_decoded += len(decoder.decode(b'', final=True))
1800 need_eof = 1
1801 if chars_decoded < chars_to_skip:
1802 raise IOError("can't reconstruct logical file position")
1803
1804 # The returned cookie corresponds to the last safe start point.
1805 return self._pack_cookie(
1806 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1807 finally:
1808 decoder.setstate(saved_state)
1809
1810 def truncate(self, pos=None):
1811 self.flush()
1812 if pos is None:
1813 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001814 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001816 def detach(self):
1817 if self.buffer is None:
1818 raise ValueError("buffer is already detached")
1819 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001820 buffer = self._buffer
1821 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001822 return buffer
1823
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001824 def seek(self, cookie, whence=0):
1825 if self.closed:
1826 raise ValueError("tell on closed file")
1827 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001828 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829 if whence == 1: # seek relative to current position
1830 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001831 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001832 # Seeking to the current position should attempt to
1833 # sync the underlying buffer with the current position.
1834 whence = 0
1835 cookie = self.tell()
1836 if whence == 2: # seek relative to end of file
1837 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001838 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 self.flush()
1840 position = self.buffer.seek(0, 2)
1841 self._set_decoded_chars('')
1842 self._snapshot = None
1843 if self._decoder:
1844 self._decoder.reset()
1845 return position
1846 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001847 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 if cookie < 0:
1849 raise ValueError("negative seek position %r" % (cookie,))
1850 self.flush()
1851
1852 # The strategy of seek() is to go back to the safe start point
1853 # and replay the effect of read(chars_to_skip) from there.
1854 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1855 self._unpack_cookie(cookie)
1856
1857 # Seek back to the safe start point.
1858 self.buffer.seek(start_pos)
1859 self._set_decoded_chars('')
1860 self._snapshot = None
1861
1862 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001863 if cookie == 0 and self._decoder:
1864 self._decoder.reset()
1865 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 self._decoder = self._decoder or self._get_decoder()
1867 self._decoder.setstate((b'', dec_flags))
1868 self._snapshot = (dec_flags, b'')
1869
1870 if chars_to_skip:
1871 # Just like _read_chunk, feed the decoder and save a snapshot.
1872 input_chunk = self.buffer.read(bytes_to_feed)
1873 self._set_decoded_chars(
1874 self._decoder.decode(input_chunk, need_eof))
1875 self._snapshot = (dec_flags, input_chunk)
1876
1877 # Skip chars_to_skip of the decoded characters.
1878 if len(self._decoded_chars) < chars_to_skip:
1879 raise IOError("can't restore logical file position")
1880 self._decoded_chars_used = chars_to_skip
1881
Antoine Pitroue4501852009-05-14 18:55:55 +00001882 # Finally, reset the encoder (merely useful for proper BOM handling)
1883 try:
1884 encoder = self._encoder or self._get_encoder()
1885 except LookupError:
1886 # Sometimes the encoder doesn't exist
1887 pass
1888 else:
1889 if cookie != 0:
1890 encoder.setstate(0)
1891 else:
1892 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 return cookie
1894
1895 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001896 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897 if n is None:
1898 n = -1
1899 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001900 try:
1901 n.__index__
1902 except AttributeError as err:
1903 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904 if n < 0:
1905 # Read everything.
1906 result = (self._get_decoded_chars() +
1907 decoder.decode(self.buffer.read(), final=True))
1908 self._set_decoded_chars('')
1909 self._snapshot = None
1910 return result
1911 else:
1912 # Keep reading chunks until we have n characters to return.
1913 eof = False
1914 result = self._get_decoded_chars(n)
1915 while len(result) < n and not eof:
1916 eof = not self._read_chunk()
1917 result += self._get_decoded_chars(n - len(result))
1918 return result
1919
1920 def __next__(self):
1921 self._telling = False
1922 line = self.readline()
1923 if not line:
1924 self._snapshot = None
1925 self._telling = self._seekable
1926 raise StopIteration
1927 return line
1928
1929 def readline(self, limit=None):
1930 if self.closed:
1931 raise ValueError("read from closed file")
1932 if limit is None:
1933 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001934 elif not isinstance(limit, int):
1935 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936
1937 # Grab all the decoded text (we will rewind any extra bits later).
1938 line = self._get_decoded_chars()
1939
1940 start = 0
1941 # Make the decoder if it doesn't already exist.
1942 if not self._decoder:
1943 self._get_decoder()
1944
1945 pos = endpos = None
1946 while True:
1947 if self._readtranslate:
1948 # Newlines are already translated, only search for \n
1949 pos = line.find('\n', start)
1950 if pos >= 0:
1951 endpos = pos + 1
1952 break
1953 else:
1954 start = len(line)
1955
1956 elif self._readuniversal:
1957 # Universal newline search. Find any of \r, \r\n, \n
1958 # The decoder ensures that \r\n are not split in two pieces
1959
1960 # In C we'd look for these in parallel of course.
1961 nlpos = line.find("\n", start)
1962 crpos = line.find("\r", start)
1963 if crpos == -1:
1964 if nlpos == -1:
1965 # Nothing found
1966 start = len(line)
1967 else:
1968 # Found \n
1969 endpos = nlpos + 1
1970 break
1971 elif nlpos == -1:
1972 # Found lone \r
1973 endpos = crpos + 1
1974 break
1975 elif nlpos < crpos:
1976 # Found \n
1977 endpos = nlpos + 1
1978 break
1979 elif nlpos == crpos + 1:
1980 # Found \r\n
1981 endpos = crpos + 2
1982 break
1983 else:
1984 # Found \r
1985 endpos = crpos + 1
1986 break
1987 else:
1988 # non-universal
1989 pos = line.find(self._readnl)
1990 if pos >= 0:
1991 endpos = pos + len(self._readnl)
1992 break
1993
1994 if limit >= 0 and len(line) >= limit:
1995 endpos = limit # reached length limit
1996 break
1997
1998 # No line ending seen yet - get more data'
1999 while self._read_chunk():
2000 if self._decoded_chars:
2001 break
2002 if self._decoded_chars:
2003 line += self._get_decoded_chars()
2004 else:
2005 # end of file
2006 self._set_decoded_chars('')
2007 self._snapshot = None
2008 return line
2009
2010 if limit >= 0 and endpos > limit:
2011 endpos = limit # don't exceed limit
2012
2013 # Rewind _decoded_chars to just after the line ending we found.
2014 self._rewind_decoded_chars(len(line) - endpos)
2015 return line[:endpos]
2016
2017 @property
2018 def newlines(self):
2019 return self._decoder.newlines if self._decoder else None
2020
2021
2022class StringIO(TextIOWrapper):
2023 """Text I/O implementation using an in-memory buffer.
2024
2025 The initial_value argument sets the value of object. The newline
2026 argument is like the one of TextIOWrapper's constructor.
2027 """
2028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 def __init__(self, initial_value="", newline="\n"):
2030 super(StringIO, self).__init__(BytesIO(),
2031 encoding="utf-8",
2032 errors="strict",
2033 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002034 # Issue #5645: make universal newlines semantics the same as in the
2035 # C version, even under Windows.
2036 if newline is None:
2037 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002038 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002040 raise TypeError("initial_value must be str or None, not {0}"
2041 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 initial_value = str(initial_value)
2043 self.write(initial_value)
2044 self.seek(0)
2045
2046 def getvalue(self):
2047 self.flush()
2048 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002049
2050 def __repr__(self):
2051 # TextIOWrapper tells the encoding in its repr. In StringIO,
2052 # that's a implementation detail.
2053 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002054
2055 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002056 def errors(self):
2057 return None
2058
2059 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002060 def encoding(self):
2061 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002062
2063 def detach(self):
2064 # This doesn't make sense on StringIO.
2065 self._unsupported("detach")