blob: a5d61353109f9f2e10434a5dabfdb29849f5cad5 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Georg Brandl4d73b572011-01-13 07:13:06 +000037def open(file, mode="r", buffering=-1, encoding=None, errors=None,
38 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40 r"""Open file and return a stream. Raise IOError upon failure.
41
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
48 mode is an optional string that specifies the mode in which the file
49 is opened. It defaults to 'r' which means open for reading in text
50 mode. Other common values are 'w' for writing (truncating the file if
51 it already exists), and 'a' for appending (which on some Unix systems,
52 means that all writes append to the end of the file regardless of the
53 current seek position). In text mode, if encoding is not specified the
54 encoding used is platform dependent. (For reading and writing raw
55 bytes use binary mode and leave encoding unspecified.) The available
56 modes are:
57
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
63 'a' open for writing, appending to the end of the file if it exists
64 'b' binary mode
65 't' text mode (default)
66 '+' open a disk file for updating (reading and writing)
67 'U' universal newline mode (for backwards compatibility; unneeded
68 for new code)
69 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
73 'r+b' opens the file without truncation.
74
75 Python distinguishes between files opened in binary and text modes,
76 even when the underlying operating system doesn't. Files opened in
77 binary mode (appending 'b' to the mode argument) return contents as
78 bytes objects without any decoding. In text mode (the default, or when
79 't' is appended to the mode argument), the contents of the file are
80 returned as strings, the bytes having been first decoded using a
81 platform-dependent encoding or using the specified encoding if given.
82
Antoine Pitroud5587bc2009-12-19 21:08:31 +000083 buffering is an optional integer used to set the buffering policy.
84 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
85 line buffering (only usable in text mode), and an integer > 1 to indicate
86 the size of a fixed-size chunk buffer. When no buffering argument is
87 given, the default buffering policy works as follows:
88
89 * Binary files are buffered in fixed-size chunks; the size of the buffer
90 is chosen using a heuristic trying to determine the underlying device's
91 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
92 On many systems, the buffer will typically be 4096 or 8192 bytes long.
93
94 * "Interactive" text files (files for which isatty() returns True)
95 use line buffering. Other text files use the policy described above
96 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000097
Raymond Hettingercbb80892011-01-13 18:15:51 +000098 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099 file. This should only be used in text mode. The default encoding is
100 platform dependent, but any encoding supported by Python can be
101 passed. See the codecs module for the list of supported encodings.
102
103 errors is an optional string that specifies how encoding errors are to
104 be handled---this argument should not be used in binary mode. Pass
105 'strict' to raise a ValueError exception if there is an encoding error
106 (the default of None has the same effect), or pass 'ignore' to ignore
107 errors. (Note that ignoring encoding errors can lead to data loss.)
108 See the documentation for codecs.register for a list of the permitted
109 encoding error strings.
110
Raymond Hettingercbb80892011-01-13 18:15:51 +0000111 newline is a string controlling how universal newlines works (it only
112 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
113 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114
115 * On input, if newline is None, universal newlines mode is
116 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
117 these are translated into '\n' before being returned to the
118 caller. If it is '', universal newline mode is enabled, but line
119 endings are returned to the caller untranslated. If it has any of
120 the other legal values, input lines are only terminated by the given
121 string, and the line ending is returned to the caller untranslated.
122
123 * On output, if newline is None, any '\n' characters written are
124 translated to the system default line separator, os.linesep. If
125 newline is '', no translation takes place. If newline is any of the
126 other legal values, any '\n' characters written are translated to
127 the given string.
128
Raymond Hettingercbb80892011-01-13 18:15:51 +0000129 closedfd is a bool. If closefd is False, the underlying file descriptor will
130 be kept open when the file is closed. This does not work when a file name is
131 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000132
133 open() returns a file object whose type depends on the mode, and
134 through which the standard file operations such as reading and writing
135 are performed. When open() is used to open a file in a text mode ('w',
136 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
137 a file in a binary mode, the returned class varies: in read binary
138 mode, it returns a BufferedReader; in write binary and append binary
139 modes, it returns a BufferedWriter, and in read/write mode, it returns
140 a BufferedRandom.
141
142 It is also possible to use a string or bytearray as a file for both
143 reading and writing. For strings StringIO can be used like a file
144 opened in a text mode, and for bytes a BytesIO can be used like a file
145 opened in a binary mode.
146 """
147 if not isinstance(file, (str, bytes, int)):
148 raise TypeError("invalid file: %r" % file)
149 if not isinstance(mode, str):
150 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000151 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000152 raise TypeError("invalid buffering: %r" % buffering)
153 if encoding is not None and not isinstance(encoding, str):
154 raise TypeError("invalid encoding: %r" % encoding)
155 if errors is not None and not isinstance(errors, str):
156 raise TypeError("invalid errors: %r" % errors)
157 modes = set(mode)
158 if modes - set("arwb+tU") or len(mode) > len(modes):
159 raise ValueError("invalid mode: %r" % mode)
160 reading = "r" in modes
161 writing = "w" in modes
162 appending = "a" in modes
163 updating = "+" in modes
164 text = "t" in modes
165 binary = "b" in modes
166 if "U" in modes:
167 if writing or appending:
168 raise ValueError("can't use U and writing mode at once")
169 reading = True
170 if text and binary:
171 raise ValueError("can't have text and binary mode at once")
172 if reading + writing + appending > 1:
173 raise ValueError("can't have read/write/append mode at once")
174 if not (reading or writing or appending):
175 raise ValueError("must have exactly one of read/write/append mode")
176 if binary and encoding is not None:
177 raise ValueError("binary mode doesn't take an encoding argument")
178 if binary and errors is not None:
179 raise ValueError("binary mode doesn't take an errors argument")
180 if binary and newline is not None:
181 raise ValueError("binary mode doesn't take a newline argument")
182 raw = FileIO(file,
183 (reading and "r" or "") +
184 (writing and "w" or "") +
185 (appending and "a" or "") +
186 (updating and "+" or ""),
187 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 line_buffering = False
189 if buffering == 1 or buffering < 0 and raw.isatty():
190 buffering = -1
191 line_buffering = True
192 if buffering < 0:
193 buffering = DEFAULT_BUFFER_SIZE
194 try:
195 bs = os.fstat(raw.fileno()).st_blksize
196 except (os.error, AttributeError):
197 pass
198 else:
199 if bs > 1:
200 buffering = bs
201 if buffering < 0:
202 raise ValueError("invalid buffering size")
203 if buffering == 0:
204 if binary:
205 return raw
206 raise ValueError("can't have unbuffered text I/O")
207 if updating:
208 buffer = BufferedRandom(raw, buffering)
209 elif writing or appending:
210 buffer = BufferedWriter(raw, buffering)
211 elif reading:
212 buffer = BufferedReader(raw, buffering)
213 else:
214 raise ValueError("unknown mode: %r" % mode)
215 if binary:
216 return buffer
217 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
218 text.mode = mode
219 return text
220
221
222class DocDescriptor:
223 """Helper for builtins.open.__doc__
224 """
225 def __get__(self, obj, typ):
226 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000227 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228 "errors=None, newline=None, closefd=True)\n\n" +
229 open.__doc__)
230
231class OpenWrapper:
232 """Wrapper for builtins.open
233
234 Trick so that open won't become a bound method when stored
235 as a class variable (as dbm.dumb does).
236
237 See initstdio() in Python/pythonrun.c.
238 """
239 __doc__ = DocDescriptor()
240
241 def __new__(cls, *args, **kwargs):
242 return open(*args, **kwargs)
243
244
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000245# In normal operation, both `UnsupportedOperation`s should be bound to the
246# same object.
247try:
248 UnsupportedOperation = io.UnsupportedOperation
249except AttributeError:
250 class UnsupportedOperation(ValueError, IOError):
251 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252
253
254class IOBase(metaclass=abc.ABCMeta):
255
256 """The abstract base class for all I/O classes, acting on streams of
257 bytes. There is no public constructor.
258
259 This class provides dummy implementations for many methods that
260 derived classes can override selectively; the default implementations
261 represent a file that cannot be read, written or seeked.
262
263 Even though IOBase does not declare read, readinto, or write because
264 their signatures will vary, implementations and clients should
265 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000266 may raise UnsupportedOperation when operations they do not support are
267 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268
269 The basic type used for binary data read from or written to a file is
270 bytes. bytearrays are accepted too, and in some cases (such as
271 readinto) needed. Text I/O classes work with str data.
272
273 Note that calling any method (even inquiries) on a closed stream is
274 undefined. Implementations may raise IOError in this case.
275
276 IOBase (and its subclasses) support the iterator protocol, meaning
277 that an IOBase object can be iterated over yielding the lines in a
278 stream.
279
280 IOBase also supports the :keyword:`with` statement. In this example,
281 fp is closed after the suite of the with statement is complete:
282
283 with open('spam.txt', 'r') as fp:
284 fp.write('Spam and eggs!')
285 """
286
287 ### Internal ###
288
Raymond Hettinger3c940242011-01-12 23:39:31 +0000289 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000290 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291 raise UnsupportedOperation("%s.%s() not supported" %
292 (self.__class__.__name__, name))
293
294 ### Positioning ###
295
Georg Brandl4d73b572011-01-13 07:13:06 +0000296 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297 """Change stream position.
298
299 Change the stream position to byte offset offset. offset is
300 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000301 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 * 0 -- start of stream (the default); offset should be zero or positive
304 * 1 -- current stream position; offset may be negative
305 * 2 -- end of stream; offset is usually negative
306
Raymond Hettingercbb80892011-01-13 18:15:51 +0000307 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 """
309 self._unsupported("seek")
310
Raymond Hettinger3c940242011-01-12 23:39:31 +0000311 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000312 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 return self.seek(0, 1)
314
Georg Brandl4d73b572011-01-13 07:13:06 +0000315 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 """Truncate file to size bytes.
317
318 Size defaults to the current IO position as reported by tell(). Return
319 the new size.
320 """
321 self._unsupported("truncate")
322
323 ### Flush and close ###
324
Raymond Hettinger3c940242011-01-12 23:39:31 +0000325 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 """Flush write buffers, if applicable.
327
328 This is not implemented for read-only and non-blocking streams.
329 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000330 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 # XXX Should this return the number of bytes written???
332
333 __closed = False
334
Raymond Hettinger3c940242011-01-12 23:39:31 +0000335 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 """Flush and close the IO object.
337
338 This method has no effect if the file is already closed.
339 """
340 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000341 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 self.__closed = True
343
Raymond Hettinger3c940242011-01-12 23:39:31 +0000344 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 """Destructor. Calls close()."""
346 # The try/except block is in case this is called at program
347 # exit time, when it's possible that globals have already been
348 # deleted, and then the close() call might fail. Since
349 # there's nothing we can do about such failures and they annoy
350 # the end users, we suppress the traceback.
351 try:
352 self.close()
353 except:
354 pass
355
356 ### Inquiries ###
357
Raymond Hettinger3c940242011-01-12 23:39:31 +0000358 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000359 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000361 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000362 This method may need to do a test seek().
363 """
364 return False
365
366 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000367 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 """
369 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000370 raise UnsupportedOperation("File or stream is not seekable."
371 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372
Raymond Hettinger3c940242011-01-12 23:39:31 +0000373 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000374 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000376 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377 """
378 return False
379
380 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000381 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 """
383 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000384 raise UnsupportedOperation("File or stream is not readable."
385 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
Raymond Hettinger3c940242011-01-12 23:39:31 +0000387 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000388 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000390 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 """
392 return False
393
394 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000395 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 """
397 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000398 raise UnsupportedOperation("File or stream is not writable."
399 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400
401 @property
402 def closed(self):
403 """closed: bool. True iff the file has been closed.
404
405 For backwards compatibility, this is a property, not a predicate.
406 """
407 return self.__closed
408
409 def _checkClosed(self, msg=None):
410 """Internal: raise an ValueError if file is closed
411 """
412 if self.closed:
413 raise ValueError("I/O operation on closed file."
414 if msg is None else msg)
415
416 ### Context manager ###
417
Raymond Hettinger3c940242011-01-12 23:39:31 +0000418 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000419 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420 self._checkClosed()
421 return self
422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 """Context management protocol. Calls close()"""
425 self.close()
426
427 ### Lower-level APIs ###
428
429 # XXX Should these be present even if unimplemented?
430
Raymond Hettinger3c940242011-01-12 23:39:31 +0000431 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000432 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433
434 An IOError is raised if the IO object does not use a file descriptor.
435 """
436 self._unsupported("fileno")
437
Raymond Hettinger3c940242011-01-12 23:39:31 +0000438 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000439 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440
441 Return False if it can't be determined.
442 """
443 self._checkClosed()
444 return False
445
446 ### Readline[s] and writelines ###
447
Georg Brandl4d73b572011-01-13 07:13:06 +0000448 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450
451 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000452 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
Georg Brandl4d73b572011-01-13 07:13:06 +0000535 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000536 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000547 if n is None:
548 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
Raymond Hettinger3c940242011-01-12 23:39:31 +0000562 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000563 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564
Raymond Hettingercbb80892011-01-13 18:15:51 +0000565 Returns an int representing the number of bytes read (0 for EOF), or
566 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """
568 self._unsupported("readinto")
569
Raymond Hettinger3c940242011-01-12 23:39:31 +0000570 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
Georg Brandl4d73b572011-01-13 07:13:06 +0000599 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000600 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
Georg Brandl4d73b572011-01-13 07:13:06 +0000619 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000620 """Read up to n bytes with at most one read() system call,
621 where n is an int.
622 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 self._unsupported("read1")
624
Raymond Hettinger3c940242011-01-12 23:39:31 +0000625 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000626 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627
628 Like read(), this may issue multiple reads to the underlying raw
629 stream, unless the latter is 'interactive'.
630
Raymond Hettingercbb80892011-01-13 18:15:51 +0000631 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632
633 Raises BlockingIOError if the underlying raw stream has no
634 data at the moment.
635 """
636 # XXX This ought to work with anything that supports the buffer API
637 data = self.read(len(b))
638 n = len(data)
639 try:
640 b[:n] = data
641 except TypeError as err:
642 import array
643 if not isinstance(b, array.array):
644 raise err
645 b[:n] = array.array('b', data)
646 return n
647
Raymond Hettinger3c940242011-01-12 23:39:31 +0000648 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000649 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650
651 Return the number of bytes written, which is never less than
652 len(b).
653
654 Raises BlockingIOError if the buffer is full and the
655 underlying raw stream cannot accept more data at the moment.
656 """
657 self._unsupported("write")
658
Raymond Hettinger3c940242011-01-12 23:39:31 +0000659 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000660 """
661 Separate the underlying raw stream from the buffer and return it.
662
663 After the raw stream has been detached, the buffer is in an unusable
664 state.
665 """
666 self._unsupported("detach")
667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668io.BufferedIOBase.register(BufferedIOBase)
669
670
671class _BufferedIOMixin(BufferedIOBase):
672
673 """A mixin implementation of BufferedIOBase with an underlying raw stream.
674
675 This passes most requests on to the underlying raw stream. It
676 does *not* provide implementations of read(), readinto() or
677 write().
678 """
679
680 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000681 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 ### Positioning ###
684
685 def seek(self, pos, whence=0):
686 new_position = self.raw.seek(pos, whence)
687 if new_position < 0:
688 raise IOError("seek() returned an invalid position")
689 return new_position
690
691 def tell(self):
692 pos = self.raw.tell()
693 if pos < 0:
694 raise IOError("tell() returned an invalid position")
695 return pos
696
697 def truncate(self, pos=None):
698 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
699 # and a flush may be necessary to synch both views of the current
700 # file state.
701 self.flush()
702
703 if pos is None:
704 pos = self.tell()
705 # XXX: Should seek() be used, instead of passing the position
706 # XXX directly to truncate?
707 return self.raw.truncate(pos)
708
709 ### Flush and close ###
710
711 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000712 if self.closed:
713 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714 self.raw.flush()
715
716 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000717 if self.raw is not None and not self.closed:
718 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719 self.raw.close()
720
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000721 def detach(self):
722 if self.raw is None:
723 raise ValueError("raw stream already detached")
724 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000725 raw = self._raw
726 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000727 return raw
728
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729 ### Inquiries ###
730
731 def seekable(self):
732 return self.raw.seekable()
733
734 def readable(self):
735 return self.raw.readable()
736
737 def writable(self):
738 return self.raw.writable()
739
740 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000741 def raw(self):
742 return self._raw
743
744 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745 def closed(self):
746 return self.raw.closed
747
748 @property
749 def name(self):
750 return self.raw.name
751
752 @property
753 def mode(self):
754 return self.raw.mode
755
Antoine Pitrou243757e2010-11-05 21:15:39 +0000756 def __getstate__(self):
757 raise TypeError("can not serialize a '{0}' object"
758 .format(self.__class__.__name__))
759
Antoine Pitrou716c4442009-05-23 19:04:03 +0000760 def __repr__(self):
761 clsname = self.__class__.__name__
762 try:
763 name = self.name
764 except AttributeError:
765 return "<_pyio.{0}>".format(clsname)
766 else:
767 return "<_pyio.{0} name={1!r}>".format(clsname, name)
768
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769 ### Lower-level APIs ###
770
771 def fileno(self):
772 return self.raw.fileno()
773
774 def isatty(self):
775 return self.raw.isatty()
776
777
778class BytesIO(BufferedIOBase):
779
780 """Buffered I/O implementation using an in-memory bytes buffer."""
781
782 def __init__(self, initial_bytes=None):
783 buf = bytearray()
784 if initial_bytes is not None:
785 buf += initial_bytes
786 self._buffer = buf
787 self._pos = 0
788
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000789 def __getstate__(self):
790 if self.closed:
791 raise ValueError("__getstate__ on closed file")
792 return self.__dict__.copy()
793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 def getvalue(self):
795 """Return the bytes value (contents) of the buffer
796 """
797 if self.closed:
798 raise ValueError("getvalue on closed file")
799 return bytes(self._buffer)
800
Antoine Pitrou972ee132010-09-06 18:48:21 +0000801 def getbuffer(self):
802 """Return a readable and writable view of the buffer.
803 """
804 return memoryview(self._buffer)
805
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 def read(self, n=None):
807 if self.closed:
808 raise ValueError("read from closed file")
809 if n is None:
810 n = -1
811 if n < 0:
812 n = len(self._buffer)
813 if len(self._buffer) <= self._pos:
814 return b""
815 newpos = min(len(self._buffer), self._pos + n)
816 b = self._buffer[self._pos : newpos]
817 self._pos = newpos
818 return bytes(b)
819
820 def read1(self, n):
821 """This is the same as read.
822 """
823 return self.read(n)
824
825 def write(self, b):
826 if self.closed:
827 raise ValueError("write to closed file")
828 if isinstance(b, str):
829 raise TypeError("can't write str to binary stream")
830 n = len(b)
831 if n == 0:
832 return 0
833 pos = self._pos
834 if pos > len(self._buffer):
835 # Inserts null bytes between the current end of the file
836 # and the new write position.
837 padding = b'\x00' * (pos - len(self._buffer))
838 self._buffer += padding
839 self._buffer[pos:pos + n] = b
840 self._pos += n
841 return n
842
843 def seek(self, pos, whence=0):
844 if self.closed:
845 raise ValueError("seek on closed file")
846 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000847 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 except AttributeError as err:
849 raise TypeError("an integer is required") from err
850 if whence == 0:
851 if pos < 0:
852 raise ValueError("negative seek position %r" % (pos,))
853 self._pos = pos
854 elif whence == 1:
855 self._pos = max(0, self._pos + pos)
856 elif whence == 2:
857 self._pos = max(0, len(self._buffer) + pos)
858 else:
859 raise ValueError("invalid whence value")
860 return self._pos
861
862 def tell(self):
863 if self.closed:
864 raise ValueError("tell on closed file")
865 return self._pos
866
867 def truncate(self, pos=None):
868 if self.closed:
869 raise ValueError("truncate on closed file")
870 if pos is None:
871 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000872 else:
873 try:
874 pos.__index__
875 except AttributeError as err:
876 raise TypeError("an integer is required") from err
877 if pos < 0:
878 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000880 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881
882 def readable(self):
883 return True
884
885 def writable(self):
886 return True
887
888 def seekable(self):
889 return True
890
891
892class BufferedReader(_BufferedIOMixin):
893
894 """BufferedReader(raw[, buffer_size])
895
896 A buffer for a readable, sequential BaseRawIO object.
897
898 The constructor creates a BufferedReader for the given readable raw
899 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
900 is used.
901 """
902
903 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
904 """Create a new buffered reader using the given readable raw IO object.
905 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000906 if not raw.readable():
907 raise IOError('"raw" argument must be readable.')
908
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 _BufferedIOMixin.__init__(self, raw)
910 if buffer_size <= 0:
911 raise ValueError("invalid buffer size")
912 self.buffer_size = buffer_size
913 self._reset_read_buf()
914 self._read_lock = Lock()
915
916 def _reset_read_buf(self):
917 self._read_buf = b""
918 self._read_pos = 0
919
920 def read(self, n=None):
921 """Read n bytes.
922
923 Returns exactly n bytes of data unless the underlying raw IO
924 stream reaches EOF or if the call would block in non-blocking
925 mode. If n is negative, read until EOF or until read() would
926 block.
927 """
928 if n is not None and n < -1:
929 raise ValueError("invalid number of bytes to read")
930 with self._read_lock:
931 return self._read_unlocked(n)
932
933 def _read_unlocked(self, n=None):
934 nodata_val = b""
935 empty_values = (b"", None)
936 buf = self._read_buf
937 pos = self._read_pos
938
939 # Special case for when the number of bytes to read is unspecified.
940 if n is None or n == -1:
941 self._reset_read_buf()
942 chunks = [buf[pos:]] # Strip the consumed bytes.
943 current_size = 0
944 while True:
945 # Read until EOF or until read() would block.
946 chunk = self.raw.read()
947 if chunk in empty_values:
948 nodata_val = chunk
949 break
950 current_size += len(chunk)
951 chunks.append(chunk)
952 return b"".join(chunks) or nodata_val
953
954 # The number of bytes to read is specified, return at most n bytes.
955 avail = len(buf) - pos # Length of the available buffered data.
956 if n <= avail:
957 # Fast path: the data to read is fully buffered.
958 self._read_pos += n
959 return buf[pos:pos+n]
960 # Slow path: read from the stream until enough bytes are read,
961 # or until an EOF occurs or until read() would block.
962 chunks = [buf[pos:]]
963 wanted = max(self.buffer_size, n)
964 while avail < n:
965 chunk = self.raw.read(wanted)
966 if chunk in empty_values:
967 nodata_val = chunk
968 break
969 avail += len(chunk)
970 chunks.append(chunk)
971 # n is more then avail only when an EOF occurred or when
972 # read() would have blocked.
973 n = min(n, avail)
974 out = b"".join(chunks)
975 self._read_buf = out[n:] # Save the extra data in the buffer.
976 self._read_pos = 0
977 return out[:n] if out else nodata_val
978
979 def peek(self, n=0):
980 """Returns buffered bytes without advancing the position.
981
982 The argument indicates a desired minimal number of bytes; we
983 do at most one raw read to satisfy it. We never return more
984 than self.buffer_size.
985 """
986 with self._read_lock:
987 return self._peek_unlocked(n)
988
989 def _peek_unlocked(self, n=0):
990 want = min(n, self.buffer_size)
991 have = len(self._read_buf) - self._read_pos
992 if have < want or have <= 0:
993 to_read = self.buffer_size - have
994 current = self.raw.read(to_read)
995 if current:
996 self._read_buf = self._read_buf[self._read_pos:] + current
997 self._read_pos = 0
998 return self._read_buf[self._read_pos:]
999
1000 def read1(self, n):
1001 """Reads up to n bytes, with at most one read() system call."""
1002 # Returns up to n bytes. If at least one byte is buffered, we
1003 # only return buffered bytes. Otherwise, we do one raw read.
1004 if n < 0:
1005 raise ValueError("number of bytes to read must be positive")
1006 if n == 0:
1007 return b""
1008 with self._read_lock:
1009 self._peek_unlocked(1)
1010 return self._read_unlocked(
1011 min(n, len(self._read_buf) - self._read_pos))
1012
1013 def tell(self):
1014 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1015
1016 def seek(self, pos, whence=0):
1017 if not (0 <= whence <= 2):
1018 raise ValueError("invalid whence value")
1019 with self._read_lock:
1020 if whence == 1:
1021 pos -= len(self._read_buf) - self._read_pos
1022 pos = _BufferedIOMixin.seek(self, pos, whence)
1023 self._reset_read_buf()
1024 return pos
1025
1026class BufferedWriter(_BufferedIOMixin):
1027
1028 """A buffer for a writeable sequential RawIO object.
1029
1030 The constructor creates a BufferedWriter for the given writeable raw
1031 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001032 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 """
1034
Benjamin Peterson59406a92009-03-26 17:10:29 +00001035 _warning_stack_offset = 2
1036
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037 def __init__(self, raw,
1038 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001039 if not raw.writable():
1040 raise IOError('"raw" argument must be writable.')
1041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 _BufferedIOMixin.__init__(self, raw)
1043 if buffer_size <= 0:
1044 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001045 if max_buffer_size is not None:
1046 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1047 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 self._write_buf = bytearray()
1050 self._write_lock = Lock()
1051
1052 def write(self, b):
1053 if self.closed:
1054 raise ValueError("write to closed file")
1055 if isinstance(b, str):
1056 raise TypeError("can't write str to binary stream")
1057 with self._write_lock:
1058 # XXX we can implement some more tricks to try and avoid
1059 # partial writes
1060 if len(self._write_buf) > self.buffer_size:
1061 # We're full, so let's pre-flush the buffer
1062 try:
1063 self._flush_unlocked()
1064 except BlockingIOError as e:
1065 # We can't accept anything else.
1066 # XXX Why not just let the exception pass through?
1067 raise BlockingIOError(e.errno, e.strerror, 0)
1068 before = len(self._write_buf)
1069 self._write_buf.extend(b)
1070 written = len(self._write_buf) - before
1071 if len(self._write_buf) > self.buffer_size:
1072 try:
1073 self._flush_unlocked()
1074 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001075 if len(self._write_buf) > self.buffer_size:
1076 # We've hit the buffer_size. We have to accept a partial
1077 # write and cut back our buffer.
1078 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001080 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 raise BlockingIOError(e.errno, e.strerror, written)
1082 return written
1083
1084 def truncate(self, pos=None):
1085 with self._write_lock:
1086 self._flush_unlocked()
1087 if pos is None:
1088 pos = self.raw.tell()
1089 return self.raw.truncate(pos)
1090
1091 def flush(self):
1092 with self._write_lock:
1093 self._flush_unlocked()
1094
1095 def _flush_unlocked(self):
1096 if self.closed:
1097 raise ValueError("flush of closed file")
1098 written = 0
1099 try:
1100 while self._write_buf:
1101 n = self.raw.write(self._write_buf)
1102 if n > len(self._write_buf) or n < 0:
1103 raise IOError("write() returned incorrect number of bytes")
1104 del self._write_buf[:n]
1105 written += n
1106 except BlockingIOError as e:
1107 n = e.characters_written
1108 del self._write_buf[:n]
1109 written += n
1110 raise BlockingIOError(e.errno, e.strerror, written)
1111
1112 def tell(self):
1113 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1114
1115 def seek(self, pos, whence=0):
1116 if not (0 <= whence <= 2):
1117 raise ValueError("invalid whence")
1118 with self._write_lock:
1119 self._flush_unlocked()
1120 return _BufferedIOMixin.seek(self, pos, whence)
1121
1122
1123class BufferedRWPair(BufferedIOBase):
1124
1125 """A buffered reader and writer object together.
1126
1127 A buffered reader object and buffered writer object put together to
1128 form a sequential IO object that can read and write. This is typically
1129 used with a socket or two-way pipe.
1130
1131 reader and writer are RawIOBase objects that are readable and
1132 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001133 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134 """
1135
1136 # XXX The usefulness of this (compared to having two separate IO
1137 # objects) is questionable.
1138
1139 def __init__(self, reader, writer,
1140 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1141 """Constructor.
1142
1143 The arguments are two RawIO instances.
1144 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001145 if max_buffer_size is not None:
1146 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001147
1148 if not reader.readable():
1149 raise IOError('"reader" argument must be readable.')
1150
1151 if not writer.writable():
1152 raise IOError('"writer" argument must be writable.')
1153
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001155 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156
1157 def read(self, n=None):
1158 if n is None:
1159 n = -1
1160 return self.reader.read(n)
1161
1162 def readinto(self, b):
1163 return self.reader.readinto(b)
1164
1165 def write(self, b):
1166 return self.writer.write(b)
1167
1168 def peek(self, n=0):
1169 return self.reader.peek(n)
1170
1171 def read1(self, n):
1172 return self.reader.read1(n)
1173
1174 def readable(self):
1175 return self.reader.readable()
1176
1177 def writable(self):
1178 return self.writer.writable()
1179
1180 def flush(self):
1181 return self.writer.flush()
1182
1183 def close(self):
1184 self.writer.close()
1185 self.reader.close()
1186
1187 def isatty(self):
1188 return self.reader.isatty() or self.writer.isatty()
1189
1190 @property
1191 def closed(self):
1192 return self.writer.closed
1193
1194
1195class BufferedRandom(BufferedWriter, BufferedReader):
1196
1197 """A buffered interface to random access streams.
1198
1199 The constructor creates a reader and writer for a seekable stream,
1200 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001201 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 """
1203
Benjamin Peterson59406a92009-03-26 17:10:29 +00001204 _warning_stack_offset = 3
1205
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206 def __init__(self, raw,
1207 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1208 raw._checkSeekable()
1209 BufferedReader.__init__(self, raw, buffer_size)
1210 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1211
1212 def seek(self, pos, whence=0):
1213 if not (0 <= whence <= 2):
1214 raise ValueError("invalid whence")
1215 self.flush()
1216 if self._read_buf:
1217 # Undo read ahead.
1218 with self._read_lock:
1219 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1220 # First do the raw seek, then empty the read buffer, so that
1221 # if the raw seek fails, we don't lose buffered data forever.
1222 pos = self.raw.seek(pos, whence)
1223 with self._read_lock:
1224 self._reset_read_buf()
1225 if pos < 0:
1226 raise IOError("seek() returned invalid position")
1227 return pos
1228
1229 def tell(self):
1230 if self._write_buf:
1231 return BufferedWriter.tell(self)
1232 else:
1233 return BufferedReader.tell(self)
1234
1235 def truncate(self, pos=None):
1236 if pos is None:
1237 pos = self.tell()
1238 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001239 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240
1241 def read(self, n=None):
1242 if n is None:
1243 n = -1
1244 self.flush()
1245 return BufferedReader.read(self, n)
1246
1247 def readinto(self, b):
1248 self.flush()
1249 return BufferedReader.readinto(self, b)
1250
1251 def peek(self, n=0):
1252 self.flush()
1253 return BufferedReader.peek(self, n)
1254
1255 def read1(self, n):
1256 self.flush()
1257 return BufferedReader.read1(self, n)
1258
1259 def write(self, b):
1260 if self._read_buf:
1261 # Undo readahead
1262 with self._read_lock:
1263 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1264 self._reset_read_buf()
1265 return BufferedWriter.write(self, b)
1266
1267
1268class TextIOBase(IOBase):
1269
1270 """Base class for text I/O.
1271
1272 This class provides a character and line based interface to stream
1273 I/O. There is no readinto method because Python's character strings
1274 are immutable. There is no public constructor.
1275 """
1276
Georg Brandl4d73b572011-01-13 07:13:06 +00001277 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001278 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279
1280 Read from underlying buffer until we have n characters or we hit EOF.
1281 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001282
1283 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 """
1285 self._unsupported("read")
1286
Raymond Hettinger3c940242011-01-12 23:39:31 +00001287 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001288 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 self._unsupported("write")
1290
Georg Brandl4d73b572011-01-13 07:13:06 +00001291 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001292 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 self._unsupported("truncate")
1294
Raymond Hettinger3c940242011-01-12 23:39:31 +00001295 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 """Read until newline or EOF.
1297
1298 Returns an empty string if EOF is hit immediately.
1299 """
1300 self._unsupported("readline")
1301
Raymond Hettinger3c940242011-01-12 23:39:31 +00001302 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001303 """
1304 Separate the underlying buffer from the TextIOBase and return it.
1305
1306 After the underlying buffer has been detached, the TextIO is in an
1307 unusable state.
1308 """
1309 self._unsupported("detach")
1310
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001311 @property
1312 def encoding(self):
1313 """Subclasses should override."""
1314 return None
1315
1316 @property
1317 def newlines(self):
1318 """Line endings translated so far.
1319
1320 Only line endings translated during reading are considered.
1321
1322 Subclasses should override.
1323 """
1324 return None
1325
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001326 @property
1327 def errors(self):
1328 """Error setting of the decoder or encoder.
1329
1330 Subclasses should override."""
1331 return None
1332
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333io.TextIOBase.register(TextIOBase)
1334
1335
1336class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1337 r"""Codec used when reading a file in universal newlines mode. It wraps
1338 another incremental decoder, translating \r\n and \r into \n. It also
1339 records the types of newlines encountered. When used with
1340 translate=False, it ensures that the newline sequence is returned in
1341 one piece.
1342 """
1343 def __init__(self, decoder, translate, errors='strict'):
1344 codecs.IncrementalDecoder.__init__(self, errors=errors)
1345 self.translate = translate
1346 self.decoder = decoder
1347 self.seennl = 0
1348 self.pendingcr = False
1349
1350 def decode(self, input, final=False):
1351 # decode input (with the eventual \r from a previous pass)
1352 if self.decoder is None:
1353 output = input
1354 else:
1355 output = self.decoder.decode(input, final=final)
1356 if self.pendingcr and (output or final):
1357 output = "\r" + output
1358 self.pendingcr = False
1359
1360 # retain last \r even when not translating data:
1361 # then readline() is sure to get \r\n in one pass
1362 if output.endswith("\r") and not final:
1363 output = output[:-1]
1364 self.pendingcr = True
1365
1366 # Record which newlines are read
1367 crlf = output.count('\r\n')
1368 cr = output.count('\r') - crlf
1369 lf = output.count('\n') - crlf
1370 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1371 | (crlf and self._CRLF)
1372
1373 if self.translate:
1374 if crlf:
1375 output = output.replace("\r\n", "\n")
1376 if cr:
1377 output = output.replace("\r", "\n")
1378
1379 return output
1380
1381 def getstate(self):
1382 if self.decoder is None:
1383 buf = b""
1384 flag = 0
1385 else:
1386 buf, flag = self.decoder.getstate()
1387 flag <<= 1
1388 if self.pendingcr:
1389 flag |= 1
1390 return buf, flag
1391
1392 def setstate(self, state):
1393 buf, flag = state
1394 self.pendingcr = bool(flag & 1)
1395 if self.decoder is not None:
1396 self.decoder.setstate((buf, flag >> 1))
1397
1398 def reset(self):
1399 self.seennl = 0
1400 self.pendingcr = False
1401 if self.decoder is not None:
1402 self.decoder.reset()
1403
1404 _LF = 1
1405 _CR = 2
1406 _CRLF = 4
1407
1408 @property
1409 def newlines(self):
1410 return (None,
1411 "\n",
1412 "\r",
1413 ("\r", "\n"),
1414 "\r\n",
1415 ("\n", "\r\n"),
1416 ("\r", "\r\n"),
1417 ("\r", "\n", "\r\n")
1418 )[self.seennl]
1419
1420
1421class TextIOWrapper(TextIOBase):
1422
1423 r"""Character and line based layer over a BufferedIOBase object, buffer.
1424
1425 encoding gives the name of the encoding that the stream will be
1426 decoded or encoded with. It defaults to locale.getpreferredencoding.
1427
1428 errors determines the strictness of encoding and decoding (see the
1429 codecs.register) and defaults to "strict".
1430
1431 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1432 handling of line endings. If it is None, universal newlines is
1433 enabled. With this enabled, on input, the lines endings '\n', '\r',
1434 or '\r\n' are translated to '\n' before being returned to the
1435 caller. Conversely, on output, '\n' is translated to the system
1436 default line seperator, os.linesep. If newline is any other of its
1437 legal values, that newline becomes the newline when the file is read
1438 and it is returned untranslated. On output, '\n' is converted to the
1439 newline.
1440
1441 If line_buffering is True, a call to flush is implied when a call to
1442 write contains a newline character.
1443 """
1444
1445 _CHUNK_SIZE = 2048
1446
1447 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1448 line_buffering=False):
1449 if newline is not None and not isinstance(newline, str):
1450 raise TypeError("illegal newline type: %r" % (type(newline),))
1451 if newline not in (None, "", "\n", "\r", "\r\n"):
1452 raise ValueError("illegal newline value: %r" % (newline,))
1453 if encoding is None:
1454 try:
1455 encoding = os.device_encoding(buffer.fileno())
1456 except (AttributeError, UnsupportedOperation):
1457 pass
1458 if encoding is None:
1459 try:
1460 import locale
1461 except ImportError:
1462 # Importing locale may fail if Python is being built
1463 encoding = "ascii"
1464 else:
1465 encoding = locale.getpreferredencoding()
1466
1467 if not isinstance(encoding, str):
1468 raise ValueError("invalid encoding: %r" % encoding)
1469
1470 if errors is None:
1471 errors = "strict"
1472 else:
1473 if not isinstance(errors, str):
1474 raise ValueError("invalid errors: %r" % errors)
1475
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001476 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001477 self._line_buffering = line_buffering
1478 self._encoding = encoding
1479 self._errors = errors
1480 self._readuniversal = not newline
1481 self._readtranslate = newline is None
1482 self._readnl = newline
1483 self._writetranslate = newline != ''
1484 self._writenl = newline or os.linesep
1485 self._encoder = None
1486 self._decoder = None
1487 self._decoded_chars = '' # buffer for text returned from decoder
1488 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1489 self._snapshot = None # info for reconstructing decoder state
1490 self._seekable = self._telling = self.buffer.seekable()
1491
Antoine Pitroue4501852009-05-14 18:55:55 +00001492 if self._seekable and self.writable():
1493 position = self.buffer.tell()
1494 if position != 0:
1495 try:
1496 self._get_encoder().setstate(0)
1497 except LookupError:
1498 # Sometimes the encoder doesn't exist
1499 pass
1500
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1502 # where dec_flags is the second (integer) item of the decoder state
1503 # and next_input is the chunk of input bytes that comes next after the
1504 # snapshot point. We use this to reconstruct decoder states in tell().
1505
1506 # Naming convention:
1507 # - "bytes_..." for integer variables that count input bytes
1508 # - "chars_..." for integer variables that count decoded characters
1509
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001510 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001511 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001512 try:
1513 name = self.name
1514 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001515 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001516 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001517 result += " name={0!r}".format(name)
1518 try:
1519 mode = self.mode
1520 except AttributeError:
1521 pass
1522 else:
1523 result += " mode={0!r}".format(mode)
1524 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001525
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 @property
1527 def encoding(self):
1528 return self._encoding
1529
1530 @property
1531 def errors(self):
1532 return self._errors
1533
1534 @property
1535 def line_buffering(self):
1536 return self._line_buffering
1537
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001538 @property
1539 def buffer(self):
1540 return self._buffer
1541
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542 def seekable(self):
1543 return self._seekable
1544
1545 def readable(self):
1546 return self.buffer.readable()
1547
1548 def writable(self):
1549 return self.buffer.writable()
1550
1551 def flush(self):
1552 self.buffer.flush()
1553 self._telling = self._seekable
1554
1555 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001556 if self.buffer is not None and not self.closed:
1557 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001558 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559
1560 @property
1561 def closed(self):
1562 return self.buffer.closed
1563
1564 @property
1565 def name(self):
1566 return self.buffer.name
1567
1568 def fileno(self):
1569 return self.buffer.fileno()
1570
1571 def isatty(self):
1572 return self.buffer.isatty()
1573
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001574 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001575 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 if self.closed:
1577 raise ValueError("write to closed file")
1578 if not isinstance(s, str):
1579 raise TypeError("can't write %s to text stream" %
1580 s.__class__.__name__)
1581 length = len(s)
1582 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1583 if haslf and self._writetranslate and self._writenl != "\n":
1584 s = s.replace("\n", self._writenl)
1585 encoder = self._encoder or self._get_encoder()
1586 # XXX What if we were just reading?
1587 b = encoder.encode(s)
1588 self.buffer.write(b)
1589 if self._line_buffering and (haslf or "\r" in s):
1590 self.flush()
1591 self._snapshot = None
1592 if self._decoder:
1593 self._decoder.reset()
1594 return length
1595
1596 def _get_encoder(self):
1597 make_encoder = codecs.getincrementalencoder(self._encoding)
1598 self._encoder = make_encoder(self._errors)
1599 return self._encoder
1600
1601 def _get_decoder(self):
1602 make_decoder = codecs.getincrementaldecoder(self._encoding)
1603 decoder = make_decoder(self._errors)
1604 if self._readuniversal:
1605 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1606 self._decoder = decoder
1607 return decoder
1608
1609 # The following three methods implement an ADT for _decoded_chars.
1610 # Text returned from the decoder is buffered here until the client
1611 # requests it by calling our read() or readline() method.
1612 def _set_decoded_chars(self, chars):
1613 """Set the _decoded_chars buffer."""
1614 self._decoded_chars = chars
1615 self._decoded_chars_used = 0
1616
1617 def _get_decoded_chars(self, n=None):
1618 """Advance into the _decoded_chars buffer."""
1619 offset = self._decoded_chars_used
1620 if n is None:
1621 chars = self._decoded_chars[offset:]
1622 else:
1623 chars = self._decoded_chars[offset:offset + n]
1624 self._decoded_chars_used += len(chars)
1625 return chars
1626
1627 def _rewind_decoded_chars(self, n):
1628 """Rewind the _decoded_chars buffer."""
1629 if self._decoded_chars_used < n:
1630 raise AssertionError("rewind decoded_chars out of bounds")
1631 self._decoded_chars_used -= n
1632
1633 def _read_chunk(self):
1634 """
1635 Read and decode the next chunk of data from the BufferedReader.
1636 """
1637
1638 # The return value is True unless EOF was reached. The decoded
1639 # string is placed in self._decoded_chars (replacing its previous
1640 # value). The entire input chunk is sent to the decoder, though
1641 # some of it may remain buffered in the decoder, yet to be
1642 # converted.
1643
1644 if self._decoder is None:
1645 raise ValueError("no decoder")
1646
1647 if self._telling:
1648 # To prepare for tell(), we need to snapshot a point in the
1649 # file where the decoder's input buffer is empty.
1650
1651 dec_buffer, dec_flags = self._decoder.getstate()
1652 # Given this, we know there was a valid snapshot point
1653 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1654
1655 # Read a chunk, decode it, and put the result in self._decoded_chars.
1656 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1657 eof = not input_chunk
1658 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1659
1660 if self._telling:
1661 # At the snapshot point, len(dec_buffer) bytes before the read,
1662 # the next input to be decoded is dec_buffer + input_chunk.
1663 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1664
1665 return not eof
1666
1667 def _pack_cookie(self, position, dec_flags=0,
1668 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1669 # The meaning of a tell() cookie is: seek to position, set the
1670 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1671 # into the decoder with need_eof as the EOF flag, then skip
1672 # chars_to_skip characters of the decoded result. For most simple
1673 # decoders, tell() will often just give a byte offset in the file.
1674 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1675 (chars_to_skip<<192) | bool(need_eof)<<256)
1676
1677 def _unpack_cookie(self, bigint):
1678 rest, position = divmod(bigint, 1<<64)
1679 rest, dec_flags = divmod(rest, 1<<64)
1680 rest, bytes_to_feed = divmod(rest, 1<<64)
1681 need_eof, chars_to_skip = divmod(rest, 1<<64)
1682 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1683
1684 def tell(self):
1685 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001686 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 if not self._telling:
1688 raise IOError("telling position disabled by next() call")
1689 self.flush()
1690 position = self.buffer.tell()
1691 decoder = self._decoder
1692 if decoder is None or self._snapshot is None:
1693 if self._decoded_chars:
1694 # This should never happen.
1695 raise AssertionError("pending decoded text")
1696 return position
1697
1698 # Skip backward to the snapshot point (see _read_chunk).
1699 dec_flags, next_input = self._snapshot
1700 position -= len(next_input)
1701
1702 # How many decoded characters have been used up since the snapshot?
1703 chars_to_skip = self._decoded_chars_used
1704 if chars_to_skip == 0:
1705 # We haven't moved from the snapshot point.
1706 return self._pack_cookie(position, dec_flags)
1707
1708 # Starting from the snapshot position, we will walk the decoder
1709 # forward until it gives us enough decoded characters.
1710 saved_state = decoder.getstate()
1711 try:
1712 # Note our initial start point.
1713 decoder.setstate((b'', dec_flags))
1714 start_pos = position
1715 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1716 need_eof = 0
1717
1718 # Feed the decoder one byte at a time. As we go, note the
1719 # nearest "safe start point" before the current location
1720 # (a point where the decoder has nothing buffered, so seek()
1721 # can safely start from there and advance to this location).
1722 next_byte = bytearray(1)
1723 for next_byte[0] in next_input:
1724 bytes_fed += 1
1725 chars_decoded += len(decoder.decode(next_byte))
1726 dec_buffer, dec_flags = decoder.getstate()
1727 if not dec_buffer and chars_decoded <= chars_to_skip:
1728 # Decoder buffer is empty, so this is a safe start point.
1729 start_pos += bytes_fed
1730 chars_to_skip -= chars_decoded
1731 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1732 if chars_decoded >= chars_to_skip:
1733 break
1734 else:
1735 # We didn't get enough decoded data; signal EOF to get more.
1736 chars_decoded += len(decoder.decode(b'', final=True))
1737 need_eof = 1
1738 if chars_decoded < chars_to_skip:
1739 raise IOError("can't reconstruct logical file position")
1740
1741 # The returned cookie corresponds to the last safe start point.
1742 return self._pack_cookie(
1743 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1744 finally:
1745 decoder.setstate(saved_state)
1746
1747 def truncate(self, pos=None):
1748 self.flush()
1749 if pos is None:
1750 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001751 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001753 def detach(self):
1754 if self.buffer is None:
1755 raise ValueError("buffer is already detached")
1756 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001757 buffer = self._buffer
1758 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001759 return buffer
1760
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 def seek(self, cookie, whence=0):
1762 if self.closed:
1763 raise ValueError("tell on closed file")
1764 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001765 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001766 if whence == 1: # seek relative to current position
1767 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001768 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 # Seeking to the current position should attempt to
1770 # sync the underlying buffer with the current position.
1771 whence = 0
1772 cookie = self.tell()
1773 if whence == 2: # seek relative to end of file
1774 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001775 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776 self.flush()
1777 position = self.buffer.seek(0, 2)
1778 self._set_decoded_chars('')
1779 self._snapshot = None
1780 if self._decoder:
1781 self._decoder.reset()
1782 return position
1783 if whence != 0:
1784 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1785 (whence,))
1786 if cookie < 0:
1787 raise ValueError("negative seek position %r" % (cookie,))
1788 self.flush()
1789
1790 # The strategy of seek() is to go back to the safe start point
1791 # and replay the effect of read(chars_to_skip) from there.
1792 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1793 self._unpack_cookie(cookie)
1794
1795 # Seek back to the safe start point.
1796 self.buffer.seek(start_pos)
1797 self._set_decoded_chars('')
1798 self._snapshot = None
1799
1800 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001801 if cookie == 0 and self._decoder:
1802 self._decoder.reset()
1803 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 self._decoder = self._decoder or self._get_decoder()
1805 self._decoder.setstate((b'', dec_flags))
1806 self._snapshot = (dec_flags, b'')
1807
1808 if chars_to_skip:
1809 # Just like _read_chunk, feed the decoder and save a snapshot.
1810 input_chunk = self.buffer.read(bytes_to_feed)
1811 self._set_decoded_chars(
1812 self._decoder.decode(input_chunk, need_eof))
1813 self._snapshot = (dec_flags, input_chunk)
1814
1815 # Skip chars_to_skip of the decoded characters.
1816 if len(self._decoded_chars) < chars_to_skip:
1817 raise IOError("can't restore logical file position")
1818 self._decoded_chars_used = chars_to_skip
1819
Antoine Pitroue4501852009-05-14 18:55:55 +00001820 # Finally, reset the encoder (merely useful for proper BOM handling)
1821 try:
1822 encoder = self._encoder or self._get_encoder()
1823 except LookupError:
1824 # Sometimes the encoder doesn't exist
1825 pass
1826 else:
1827 if cookie != 0:
1828 encoder.setstate(0)
1829 else:
1830 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 return cookie
1832
1833 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001834 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 if n is None:
1836 n = -1
1837 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001838 try:
1839 n.__index__
1840 except AttributeError as err:
1841 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 if n < 0:
1843 # Read everything.
1844 result = (self._get_decoded_chars() +
1845 decoder.decode(self.buffer.read(), final=True))
1846 self._set_decoded_chars('')
1847 self._snapshot = None
1848 return result
1849 else:
1850 # Keep reading chunks until we have n characters to return.
1851 eof = False
1852 result = self._get_decoded_chars(n)
1853 while len(result) < n and not eof:
1854 eof = not self._read_chunk()
1855 result += self._get_decoded_chars(n - len(result))
1856 return result
1857
1858 def __next__(self):
1859 self._telling = False
1860 line = self.readline()
1861 if not line:
1862 self._snapshot = None
1863 self._telling = self._seekable
1864 raise StopIteration
1865 return line
1866
1867 def readline(self, limit=None):
1868 if self.closed:
1869 raise ValueError("read from closed file")
1870 if limit is None:
1871 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001872 elif not isinstance(limit, int):
1873 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874
1875 # Grab all the decoded text (we will rewind any extra bits later).
1876 line = self._get_decoded_chars()
1877
1878 start = 0
1879 # Make the decoder if it doesn't already exist.
1880 if not self._decoder:
1881 self._get_decoder()
1882
1883 pos = endpos = None
1884 while True:
1885 if self._readtranslate:
1886 # Newlines are already translated, only search for \n
1887 pos = line.find('\n', start)
1888 if pos >= 0:
1889 endpos = pos + 1
1890 break
1891 else:
1892 start = len(line)
1893
1894 elif self._readuniversal:
1895 # Universal newline search. Find any of \r, \r\n, \n
1896 # The decoder ensures that \r\n are not split in two pieces
1897
1898 # In C we'd look for these in parallel of course.
1899 nlpos = line.find("\n", start)
1900 crpos = line.find("\r", start)
1901 if crpos == -1:
1902 if nlpos == -1:
1903 # Nothing found
1904 start = len(line)
1905 else:
1906 # Found \n
1907 endpos = nlpos + 1
1908 break
1909 elif nlpos == -1:
1910 # Found lone \r
1911 endpos = crpos + 1
1912 break
1913 elif nlpos < crpos:
1914 # Found \n
1915 endpos = nlpos + 1
1916 break
1917 elif nlpos == crpos + 1:
1918 # Found \r\n
1919 endpos = crpos + 2
1920 break
1921 else:
1922 # Found \r
1923 endpos = crpos + 1
1924 break
1925 else:
1926 # non-universal
1927 pos = line.find(self._readnl)
1928 if pos >= 0:
1929 endpos = pos + len(self._readnl)
1930 break
1931
1932 if limit >= 0 and len(line) >= limit:
1933 endpos = limit # reached length limit
1934 break
1935
1936 # No line ending seen yet - get more data'
1937 while self._read_chunk():
1938 if self._decoded_chars:
1939 break
1940 if self._decoded_chars:
1941 line += self._get_decoded_chars()
1942 else:
1943 # end of file
1944 self._set_decoded_chars('')
1945 self._snapshot = None
1946 return line
1947
1948 if limit >= 0 and endpos > limit:
1949 endpos = limit # don't exceed limit
1950
1951 # Rewind _decoded_chars to just after the line ending we found.
1952 self._rewind_decoded_chars(len(line) - endpos)
1953 return line[:endpos]
1954
1955 @property
1956 def newlines(self):
1957 return self._decoder.newlines if self._decoder else None
1958
1959
1960class StringIO(TextIOWrapper):
1961 """Text I/O implementation using an in-memory buffer.
1962
1963 The initial_value argument sets the value of object. The newline
1964 argument is like the one of TextIOWrapper's constructor.
1965 """
1966
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 def __init__(self, initial_value="", newline="\n"):
1968 super(StringIO, self).__init__(BytesIO(),
1969 encoding="utf-8",
1970 errors="strict",
1971 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001972 # Issue #5645: make universal newlines semantics the same as in the
1973 # C version, even under Windows.
1974 if newline is None:
1975 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001976 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001978 raise TypeError("initial_value must be str or None, not {0}"
1979 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 initial_value = str(initial_value)
1981 self.write(initial_value)
1982 self.seek(0)
1983
1984 def getvalue(self):
1985 self.flush()
1986 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001987
1988 def __repr__(self):
1989 # TextIOWrapper tells the encoding in its repr. In StringIO,
1990 # that's a implementation detail.
1991 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001992
1993 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001994 def errors(self):
1995 return None
1996
1997 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001998 def encoding(self):
1999 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002000
2001 def detach(self):
2002 # This doesn't make sense on StringIO.
2003 self._unsupported("detach")