blob: 93626e25291f010bd3b4368a34f09ad298d31ef9 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Raymond Hettinger3c940242011-01-12 23:39:31 +000037def open(file, mode = "r", buffering = -1,
38 encoding = None, errors = None,
39 newline = None, closefd = True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
99 encoding is the name of the encoding used to decode or encode the
100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
112 newline controls how universal newlines works (it only applies to text
113 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
114 follows:
115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
130 If closefd is False, the underlying file descriptor will be kept open
131 when the file is closed. This does not work when a file name is given
132 and must be True in that case.
133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
Raymond Hettinger3c940242011-01-12 23:39:31 +0000290 def _unsupported(self, name):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
Raymond Hettinger3c940242011-01-12 23:39:31 +0000297 def seek(self, pos, whence = 0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def tell(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 """Return current stream position."""
314 return self.seek(0, 1)
315
Raymond Hettinger3c940242011-01-12 23:39:31 +0000316 def truncate(self, pos = None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
Raymond Hettinger3c940242011-01-12 23:39:31 +0000326 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
Raymond Hettinger3c940242011-01-12 23:39:31 +0000336 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
Raymond Hettinger3c940242011-01-12 23:39:31 +0000345 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def seekable(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 """Return whether object supports random access.
361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def readable(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 """Return whether object was opened for reading.
376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def writable(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 """Return whether object was opened for writing.
390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def __enter__(self): # That's a forward reference
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
Raymond Hettinger3c940242011-01-12 23:39:31 +0000424 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def fileno(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
Raymond Hettinger3c940242011-01-12 23:39:31 +0000439 def isatty(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
Raymond Hettinger3c940242011-01-12 23:39:31 +0000449 def readline(self, limit = -1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
Raymond Hettinger3c940242011-01-12 23:39:31 +0000535 def read(self, n = -1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536 """Read and return up to n bytes.
537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000547 if n is None:
548 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
Raymond Hettinger3c940242011-01-12 23:39:31 +0000562 def readinto(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000563 """Read up to len(b) bytes into b.
564
565 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou328ec742010-09-14 18:37:24 +0000566 is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """
568 self._unsupported("readinto")
569
Raymond Hettinger3c940242011-01-12 23:39:31 +0000570 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
Raymond Hettinger3c940242011-01-12 23:39:31 +0000599 def read(self, n = None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600 """Read and return up to n bytes.
601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
Raymond Hettinger3c940242011-01-12 23:39:31 +0000619 def read1(self, n = None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000620 """Read up to n bytes with at most one read() system call."""
621 self._unsupported("read1")
622
Raymond Hettinger3c940242011-01-12 23:39:31 +0000623 def readinto(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 """Read up to len(b) bytes into b.
625
626 Like read(), this may issue multiple reads to the underlying raw
627 stream, unless the latter is 'interactive'.
628
629 Returns the number of bytes read (0 for EOF).
630
631 Raises BlockingIOError if the underlying raw stream has no
632 data at the moment.
633 """
634 # XXX This ought to work with anything that supports the buffer API
635 data = self.read(len(b))
636 n = len(data)
637 try:
638 b[:n] = data
639 except TypeError as err:
640 import array
641 if not isinstance(b, array.array):
642 raise err
643 b[:n] = array.array('b', data)
644 return n
645
Raymond Hettinger3c940242011-01-12 23:39:31 +0000646 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 """Write the given buffer to the IO stream.
648
649 Return the number of bytes written, which is never less than
650 len(b).
651
652 Raises BlockingIOError if the buffer is full and the
653 underlying raw stream cannot accept more data at the moment.
654 """
655 self._unsupported("write")
656
Raymond Hettinger3c940242011-01-12 23:39:31 +0000657 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000658 """
659 Separate the underlying raw stream from the buffer and return it.
660
661 After the raw stream has been detached, the buffer is in an unusable
662 state.
663 """
664 self._unsupported("detach")
665
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666io.BufferedIOBase.register(BufferedIOBase)
667
668
669class _BufferedIOMixin(BufferedIOBase):
670
671 """A mixin implementation of BufferedIOBase with an underlying raw stream.
672
673 This passes most requests on to the underlying raw stream. It
674 does *not* provide implementations of read(), readinto() or
675 write().
676 """
677
678 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000679 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680
681 ### Positioning ###
682
683 def seek(self, pos, whence=0):
684 new_position = self.raw.seek(pos, whence)
685 if new_position < 0:
686 raise IOError("seek() returned an invalid position")
687 return new_position
688
689 def tell(self):
690 pos = self.raw.tell()
691 if pos < 0:
692 raise IOError("tell() returned an invalid position")
693 return pos
694
695 def truncate(self, pos=None):
696 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
697 # and a flush may be necessary to synch both views of the current
698 # file state.
699 self.flush()
700
701 if pos is None:
702 pos = self.tell()
703 # XXX: Should seek() be used, instead of passing the position
704 # XXX directly to truncate?
705 return self.raw.truncate(pos)
706
707 ### Flush and close ###
708
709 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000710 if self.closed:
711 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712 self.raw.flush()
713
714 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000715 if self.raw is not None and not self.closed:
716 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717 self.raw.close()
718
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000719 def detach(self):
720 if self.raw is None:
721 raise ValueError("raw stream already detached")
722 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000723 raw = self._raw
724 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000725 return raw
726
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727 ### Inquiries ###
728
729 def seekable(self):
730 return self.raw.seekable()
731
732 def readable(self):
733 return self.raw.readable()
734
735 def writable(self):
736 return self.raw.writable()
737
738 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000739 def raw(self):
740 return self._raw
741
742 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743 def closed(self):
744 return self.raw.closed
745
746 @property
747 def name(self):
748 return self.raw.name
749
750 @property
751 def mode(self):
752 return self.raw.mode
753
Antoine Pitrou243757e2010-11-05 21:15:39 +0000754 def __getstate__(self):
755 raise TypeError("can not serialize a '{0}' object"
756 .format(self.__class__.__name__))
757
Antoine Pitrou716c4442009-05-23 19:04:03 +0000758 def __repr__(self):
759 clsname = self.__class__.__name__
760 try:
761 name = self.name
762 except AttributeError:
763 return "<_pyio.{0}>".format(clsname)
764 else:
765 return "<_pyio.{0} name={1!r}>".format(clsname, name)
766
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767 ### Lower-level APIs ###
768
769 def fileno(self):
770 return self.raw.fileno()
771
772 def isatty(self):
773 return self.raw.isatty()
774
775
776class BytesIO(BufferedIOBase):
777
778 """Buffered I/O implementation using an in-memory bytes buffer."""
779
780 def __init__(self, initial_bytes=None):
781 buf = bytearray()
782 if initial_bytes is not None:
783 buf += initial_bytes
784 self._buffer = buf
785 self._pos = 0
786
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000787 def __getstate__(self):
788 if self.closed:
789 raise ValueError("__getstate__ on closed file")
790 return self.__dict__.copy()
791
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792 def getvalue(self):
793 """Return the bytes value (contents) of the buffer
794 """
795 if self.closed:
796 raise ValueError("getvalue on closed file")
797 return bytes(self._buffer)
798
Antoine Pitrou972ee132010-09-06 18:48:21 +0000799 def getbuffer(self):
800 """Return a readable and writable view of the buffer.
801 """
802 return memoryview(self._buffer)
803
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 def read(self, n=None):
805 if self.closed:
806 raise ValueError("read from closed file")
807 if n is None:
808 n = -1
809 if n < 0:
810 n = len(self._buffer)
811 if len(self._buffer) <= self._pos:
812 return b""
813 newpos = min(len(self._buffer), self._pos + n)
814 b = self._buffer[self._pos : newpos]
815 self._pos = newpos
816 return bytes(b)
817
818 def read1(self, n):
819 """This is the same as read.
820 """
821 return self.read(n)
822
823 def write(self, b):
824 if self.closed:
825 raise ValueError("write to closed file")
826 if isinstance(b, str):
827 raise TypeError("can't write str to binary stream")
828 n = len(b)
829 if n == 0:
830 return 0
831 pos = self._pos
832 if pos > len(self._buffer):
833 # Inserts null bytes between the current end of the file
834 # and the new write position.
835 padding = b'\x00' * (pos - len(self._buffer))
836 self._buffer += padding
837 self._buffer[pos:pos + n] = b
838 self._pos += n
839 return n
840
841 def seek(self, pos, whence=0):
842 if self.closed:
843 raise ValueError("seek on closed file")
844 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000845 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000846 except AttributeError as err:
847 raise TypeError("an integer is required") from err
848 if whence == 0:
849 if pos < 0:
850 raise ValueError("negative seek position %r" % (pos,))
851 self._pos = pos
852 elif whence == 1:
853 self._pos = max(0, self._pos + pos)
854 elif whence == 2:
855 self._pos = max(0, len(self._buffer) + pos)
856 else:
857 raise ValueError("invalid whence value")
858 return self._pos
859
860 def tell(self):
861 if self.closed:
862 raise ValueError("tell on closed file")
863 return self._pos
864
865 def truncate(self, pos=None):
866 if self.closed:
867 raise ValueError("truncate on closed file")
868 if pos is None:
869 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000870 else:
871 try:
872 pos.__index__
873 except AttributeError as err:
874 raise TypeError("an integer is required") from err
875 if pos < 0:
876 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000878 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879
880 def readable(self):
881 return True
882
883 def writable(self):
884 return True
885
886 def seekable(self):
887 return True
888
889
890class BufferedReader(_BufferedIOMixin):
891
892 """BufferedReader(raw[, buffer_size])
893
894 A buffer for a readable, sequential BaseRawIO object.
895
896 The constructor creates a BufferedReader for the given readable raw
897 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
898 is used.
899 """
900
901 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
902 """Create a new buffered reader using the given readable raw IO object.
903 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000904 if not raw.readable():
905 raise IOError('"raw" argument must be readable.')
906
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000907 _BufferedIOMixin.__init__(self, raw)
908 if buffer_size <= 0:
909 raise ValueError("invalid buffer size")
910 self.buffer_size = buffer_size
911 self._reset_read_buf()
912 self._read_lock = Lock()
913
914 def _reset_read_buf(self):
915 self._read_buf = b""
916 self._read_pos = 0
917
918 def read(self, n=None):
919 """Read n bytes.
920
921 Returns exactly n bytes of data unless the underlying raw IO
922 stream reaches EOF or if the call would block in non-blocking
923 mode. If n is negative, read until EOF or until read() would
924 block.
925 """
926 if n is not None and n < -1:
927 raise ValueError("invalid number of bytes to read")
928 with self._read_lock:
929 return self._read_unlocked(n)
930
931 def _read_unlocked(self, n=None):
932 nodata_val = b""
933 empty_values = (b"", None)
934 buf = self._read_buf
935 pos = self._read_pos
936
937 # Special case for when the number of bytes to read is unspecified.
938 if n is None or n == -1:
939 self._reset_read_buf()
940 chunks = [buf[pos:]] # Strip the consumed bytes.
941 current_size = 0
942 while True:
943 # Read until EOF or until read() would block.
944 chunk = self.raw.read()
945 if chunk in empty_values:
946 nodata_val = chunk
947 break
948 current_size += len(chunk)
949 chunks.append(chunk)
950 return b"".join(chunks) or nodata_val
951
952 # The number of bytes to read is specified, return at most n bytes.
953 avail = len(buf) - pos # Length of the available buffered data.
954 if n <= avail:
955 # Fast path: the data to read is fully buffered.
956 self._read_pos += n
957 return buf[pos:pos+n]
958 # Slow path: read from the stream until enough bytes are read,
959 # or until an EOF occurs or until read() would block.
960 chunks = [buf[pos:]]
961 wanted = max(self.buffer_size, n)
962 while avail < n:
963 chunk = self.raw.read(wanted)
964 if chunk in empty_values:
965 nodata_val = chunk
966 break
967 avail += len(chunk)
968 chunks.append(chunk)
969 # n is more then avail only when an EOF occurred or when
970 # read() would have blocked.
971 n = min(n, avail)
972 out = b"".join(chunks)
973 self._read_buf = out[n:] # Save the extra data in the buffer.
974 self._read_pos = 0
975 return out[:n] if out else nodata_val
976
977 def peek(self, n=0):
978 """Returns buffered bytes without advancing the position.
979
980 The argument indicates a desired minimal number of bytes; we
981 do at most one raw read to satisfy it. We never return more
982 than self.buffer_size.
983 """
984 with self._read_lock:
985 return self._peek_unlocked(n)
986
987 def _peek_unlocked(self, n=0):
988 want = min(n, self.buffer_size)
989 have = len(self._read_buf) - self._read_pos
990 if have < want or have <= 0:
991 to_read = self.buffer_size - have
992 current = self.raw.read(to_read)
993 if current:
994 self._read_buf = self._read_buf[self._read_pos:] + current
995 self._read_pos = 0
996 return self._read_buf[self._read_pos:]
997
998 def read1(self, n):
999 """Reads up to n bytes, with at most one read() system call."""
1000 # Returns up to n bytes. If at least one byte is buffered, we
1001 # only return buffered bytes. Otherwise, we do one raw read.
1002 if n < 0:
1003 raise ValueError("number of bytes to read must be positive")
1004 if n == 0:
1005 return b""
1006 with self._read_lock:
1007 self._peek_unlocked(1)
1008 return self._read_unlocked(
1009 min(n, len(self._read_buf) - self._read_pos))
1010
1011 def tell(self):
1012 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1013
1014 def seek(self, pos, whence=0):
1015 if not (0 <= whence <= 2):
1016 raise ValueError("invalid whence value")
1017 with self._read_lock:
1018 if whence == 1:
1019 pos -= len(self._read_buf) - self._read_pos
1020 pos = _BufferedIOMixin.seek(self, pos, whence)
1021 self._reset_read_buf()
1022 return pos
1023
1024class BufferedWriter(_BufferedIOMixin):
1025
1026 """A buffer for a writeable sequential RawIO object.
1027
1028 The constructor creates a BufferedWriter for the given writeable raw
1029 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001030 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 """
1032
Benjamin Peterson59406a92009-03-26 17:10:29 +00001033 _warning_stack_offset = 2
1034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 def __init__(self, raw,
1036 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001037 if not raw.writable():
1038 raise IOError('"raw" argument must be writable.')
1039
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 _BufferedIOMixin.__init__(self, raw)
1041 if buffer_size <= 0:
1042 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001043 if max_buffer_size is not None:
1044 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1045 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 self._write_buf = bytearray()
1048 self._write_lock = Lock()
1049
1050 def write(self, b):
1051 if self.closed:
1052 raise ValueError("write to closed file")
1053 if isinstance(b, str):
1054 raise TypeError("can't write str to binary stream")
1055 with self._write_lock:
1056 # XXX we can implement some more tricks to try and avoid
1057 # partial writes
1058 if len(self._write_buf) > self.buffer_size:
1059 # We're full, so let's pre-flush the buffer
1060 try:
1061 self._flush_unlocked()
1062 except BlockingIOError as e:
1063 # We can't accept anything else.
1064 # XXX Why not just let the exception pass through?
1065 raise BlockingIOError(e.errno, e.strerror, 0)
1066 before = len(self._write_buf)
1067 self._write_buf.extend(b)
1068 written = len(self._write_buf) - before
1069 if len(self._write_buf) > self.buffer_size:
1070 try:
1071 self._flush_unlocked()
1072 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001073 if len(self._write_buf) > self.buffer_size:
1074 # We've hit the buffer_size. We have to accept a partial
1075 # write and cut back our buffer.
1076 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001078 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 raise BlockingIOError(e.errno, e.strerror, written)
1080 return written
1081
1082 def truncate(self, pos=None):
1083 with self._write_lock:
1084 self._flush_unlocked()
1085 if pos is None:
1086 pos = self.raw.tell()
1087 return self.raw.truncate(pos)
1088
1089 def flush(self):
1090 with self._write_lock:
1091 self._flush_unlocked()
1092
1093 def _flush_unlocked(self):
1094 if self.closed:
1095 raise ValueError("flush of closed file")
1096 written = 0
1097 try:
1098 while self._write_buf:
1099 n = self.raw.write(self._write_buf)
1100 if n > len(self._write_buf) or n < 0:
1101 raise IOError("write() returned incorrect number of bytes")
1102 del self._write_buf[:n]
1103 written += n
1104 except BlockingIOError as e:
1105 n = e.characters_written
1106 del self._write_buf[:n]
1107 written += n
1108 raise BlockingIOError(e.errno, e.strerror, written)
1109
1110 def tell(self):
1111 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1112
1113 def seek(self, pos, whence=0):
1114 if not (0 <= whence <= 2):
1115 raise ValueError("invalid whence")
1116 with self._write_lock:
1117 self._flush_unlocked()
1118 return _BufferedIOMixin.seek(self, pos, whence)
1119
1120
1121class BufferedRWPair(BufferedIOBase):
1122
1123 """A buffered reader and writer object together.
1124
1125 A buffered reader object and buffered writer object put together to
1126 form a sequential IO object that can read and write. This is typically
1127 used with a socket or two-way pipe.
1128
1129 reader and writer are RawIOBase objects that are readable and
1130 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001131 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132 """
1133
1134 # XXX The usefulness of this (compared to having two separate IO
1135 # objects) is questionable.
1136
1137 def __init__(self, reader, writer,
1138 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1139 """Constructor.
1140
1141 The arguments are two RawIO instances.
1142 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001143 if max_buffer_size is not None:
1144 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001145
1146 if not reader.readable():
1147 raise IOError('"reader" argument must be readable.')
1148
1149 if not writer.writable():
1150 raise IOError('"writer" argument must be writable.')
1151
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001153 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154
1155 def read(self, n=None):
1156 if n is None:
1157 n = -1
1158 return self.reader.read(n)
1159
1160 def readinto(self, b):
1161 return self.reader.readinto(b)
1162
1163 def write(self, b):
1164 return self.writer.write(b)
1165
1166 def peek(self, n=0):
1167 return self.reader.peek(n)
1168
1169 def read1(self, n):
1170 return self.reader.read1(n)
1171
1172 def readable(self):
1173 return self.reader.readable()
1174
1175 def writable(self):
1176 return self.writer.writable()
1177
1178 def flush(self):
1179 return self.writer.flush()
1180
1181 def close(self):
1182 self.writer.close()
1183 self.reader.close()
1184
1185 def isatty(self):
1186 return self.reader.isatty() or self.writer.isatty()
1187
1188 @property
1189 def closed(self):
1190 return self.writer.closed
1191
1192
1193class BufferedRandom(BufferedWriter, BufferedReader):
1194
1195 """A buffered interface to random access streams.
1196
1197 The constructor creates a reader and writer for a seekable stream,
1198 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001199 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 """
1201
Benjamin Peterson59406a92009-03-26 17:10:29 +00001202 _warning_stack_offset = 3
1203
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204 def __init__(self, raw,
1205 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1206 raw._checkSeekable()
1207 BufferedReader.__init__(self, raw, buffer_size)
1208 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1209
1210 def seek(self, pos, whence=0):
1211 if not (0 <= whence <= 2):
1212 raise ValueError("invalid whence")
1213 self.flush()
1214 if self._read_buf:
1215 # Undo read ahead.
1216 with self._read_lock:
1217 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1218 # First do the raw seek, then empty the read buffer, so that
1219 # if the raw seek fails, we don't lose buffered data forever.
1220 pos = self.raw.seek(pos, whence)
1221 with self._read_lock:
1222 self._reset_read_buf()
1223 if pos < 0:
1224 raise IOError("seek() returned invalid position")
1225 return pos
1226
1227 def tell(self):
1228 if self._write_buf:
1229 return BufferedWriter.tell(self)
1230 else:
1231 return BufferedReader.tell(self)
1232
1233 def truncate(self, pos=None):
1234 if pos is None:
1235 pos = self.tell()
1236 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001237 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238
1239 def read(self, n=None):
1240 if n is None:
1241 n = -1
1242 self.flush()
1243 return BufferedReader.read(self, n)
1244
1245 def readinto(self, b):
1246 self.flush()
1247 return BufferedReader.readinto(self, b)
1248
1249 def peek(self, n=0):
1250 self.flush()
1251 return BufferedReader.peek(self, n)
1252
1253 def read1(self, n):
1254 self.flush()
1255 return BufferedReader.read1(self, n)
1256
1257 def write(self, b):
1258 if self._read_buf:
1259 # Undo readahead
1260 with self._read_lock:
1261 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1262 self._reset_read_buf()
1263 return BufferedWriter.write(self, b)
1264
1265
1266class TextIOBase(IOBase):
1267
1268 """Base class for text I/O.
1269
1270 This class provides a character and line based interface to stream
1271 I/O. There is no readinto method because Python's character strings
1272 are immutable. There is no public constructor.
1273 """
1274
Raymond Hettinger3c940242011-01-12 23:39:31 +00001275 def read(self, n = -1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 """Read at most n characters from stream.
1277
1278 Read from underlying buffer until we have n characters or we hit EOF.
1279 If n is negative or omitted, read until EOF.
1280 """
1281 self._unsupported("read")
1282
Raymond Hettinger3c940242011-01-12 23:39:31 +00001283 def write(self, s):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 """Write string s to stream."""
1285 self._unsupported("write")
1286
Raymond Hettinger3c940242011-01-12 23:39:31 +00001287 def truncate(self, pos = None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001288 """Truncate size to pos."""
1289 self._unsupported("truncate")
1290
Raymond Hettinger3c940242011-01-12 23:39:31 +00001291 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 """Read until newline or EOF.
1293
1294 Returns an empty string if EOF is hit immediately.
1295 """
1296 self._unsupported("readline")
1297
Raymond Hettinger3c940242011-01-12 23:39:31 +00001298 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001299 """
1300 Separate the underlying buffer from the TextIOBase and return it.
1301
1302 After the underlying buffer has been detached, the TextIO is in an
1303 unusable state.
1304 """
1305 self._unsupported("detach")
1306
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 @property
1308 def encoding(self):
1309 """Subclasses should override."""
1310 return None
1311
1312 @property
1313 def newlines(self):
1314 """Line endings translated so far.
1315
1316 Only line endings translated during reading are considered.
1317
1318 Subclasses should override.
1319 """
1320 return None
1321
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001322 @property
1323 def errors(self):
1324 """Error setting of the decoder or encoder.
1325
1326 Subclasses should override."""
1327 return None
1328
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329io.TextIOBase.register(TextIOBase)
1330
1331
1332class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1333 r"""Codec used when reading a file in universal newlines mode. It wraps
1334 another incremental decoder, translating \r\n and \r into \n. It also
1335 records the types of newlines encountered. When used with
1336 translate=False, it ensures that the newline sequence is returned in
1337 one piece.
1338 """
1339 def __init__(self, decoder, translate, errors='strict'):
1340 codecs.IncrementalDecoder.__init__(self, errors=errors)
1341 self.translate = translate
1342 self.decoder = decoder
1343 self.seennl = 0
1344 self.pendingcr = False
1345
1346 def decode(self, input, final=False):
1347 # decode input (with the eventual \r from a previous pass)
1348 if self.decoder is None:
1349 output = input
1350 else:
1351 output = self.decoder.decode(input, final=final)
1352 if self.pendingcr and (output or final):
1353 output = "\r" + output
1354 self.pendingcr = False
1355
1356 # retain last \r even when not translating data:
1357 # then readline() is sure to get \r\n in one pass
1358 if output.endswith("\r") and not final:
1359 output = output[:-1]
1360 self.pendingcr = True
1361
1362 # Record which newlines are read
1363 crlf = output.count('\r\n')
1364 cr = output.count('\r') - crlf
1365 lf = output.count('\n') - crlf
1366 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1367 | (crlf and self._CRLF)
1368
1369 if self.translate:
1370 if crlf:
1371 output = output.replace("\r\n", "\n")
1372 if cr:
1373 output = output.replace("\r", "\n")
1374
1375 return output
1376
1377 def getstate(self):
1378 if self.decoder is None:
1379 buf = b""
1380 flag = 0
1381 else:
1382 buf, flag = self.decoder.getstate()
1383 flag <<= 1
1384 if self.pendingcr:
1385 flag |= 1
1386 return buf, flag
1387
1388 def setstate(self, state):
1389 buf, flag = state
1390 self.pendingcr = bool(flag & 1)
1391 if self.decoder is not None:
1392 self.decoder.setstate((buf, flag >> 1))
1393
1394 def reset(self):
1395 self.seennl = 0
1396 self.pendingcr = False
1397 if self.decoder is not None:
1398 self.decoder.reset()
1399
1400 _LF = 1
1401 _CR = 2
1402 _CRLF = 4
1403
1404 @property
1405 def newlines(self):
1406 return (None,
1407 "\n",
1408 "\r",
1409 ("\r", "\n"),
1410 "\r\n",
1411 ("\n", "\r\n"),
1412 ("\r", "\r\n"),
1413 ("\r", "\n", "\r\n")
1414 )[self.seennl]
1415
1416
1417class TextIOWrapper(TextIOBase):
1418
1419 r"""Character and line based layer over a BufferedIOBase object, buffer.
1420
1421 encoding gives the name of the encoding that the stream will be
1422 decoded or encoded with. It defaults to locale.getpreferredencoding.
1423
1424 errors determines the strictness of encoding and decoding (see the
1425 codecs.register) and defaults to "strict".
1426
1427 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1428 handling of line endings. If it is None, universal newlines is
1429 enabled. With this enabled, on input, the lines endings '\n', '\r',
1430 or '\r\n' are translated to '\n' before being returned to the
1431 caller. Conversely, on output, '\n' is translated to the system
1432 default line seperator, os.linesep. If newline is any other of its
1433 legal values, that newline becomes the newline when the file is read
1434 and it is returned untranslated. On output, '\n' is converted to the
1435 newline.
1436
1437 If line_buffering is True, a call to flush is implied when a call to
1438 write contains a newline character.
1439 """
1440
1441 _CHUNK_SIZE = 2048
1442
1443 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1444 line_buffering=False):
1445 if newline is not None and not isinstance(newline, str):
1446 raise TypeError("illegal newline type: %r" % (type(newline),))
1447 if newline not in (None, "", "\n", "\r", "\r\n"):
1448 raise ValueError("illegal newline value: %r" % (newline,))
1449 if encoding is None:
1450 try:
1451 encoding = os.device_encoding(buffer.fileno())
1452 except (AttributeError, UnsupportedOperation):
1453 pass
1454 if encoding is None:
1455 try:
1456 import locale
1457 except ImportError:
1458 # Importing locale may fail if Python is being built
1459 encoding = "ascii"
1460 else:
1461 encoding = locale.getpreferredencoding()
1462
1463 if not isinstance(encoding, str):
1464 raise ValueError("invalid encoding: %r" % encoding)
1465
1466 if errors is None:
1467 errors = "strict"
1468 else:
1469 if not isinstance(errors, str):
1470 raise ValueError("invalid errors: %r" % errors)
1471
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001472 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 self._line_buffering = line_buffering
1474 self._encoding = encoding
1475 self._errors = errors
1476 self._readuniversal = not newline
1477 self._readtranslate = newline is None
1478 self._readnl = newline
1479 self._writetranslate = newline != ''
1480 self._writenl = newline or os.linesep
1481 self._encoder = None
1482 self._decoder = None
1483 self._decoded_chars = '' # buffer for text returned from decoder
1484 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1485 self._snapshot = None # info for reconstructing decoder state
1486 self._seekable = self._telling = self.buffer.seekable()
1487
Antoine Pitroue4501852009-05-14 18:55:55 +00001488 if self._seekable and self.writable():
1489 position = self.buffer.tell()
1490 if position != 0:
1491 try:
1492 self._get_encoder().setstate(0)
1493 except LookupError:
1494 # Sometimes the encoder doesn't exist
1495 pass
1496
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1498 # where dec_flags is the second (integer) item of the decoder state
1499 # and next_input is the chunk of input bytes that comes next after the
1500 # snapshot point. We use this to reconstruct decoder states in tell().
1501
1502 # Naming convention:
1503 # - "bytes_..." for integer variables that count input bytes
1504 # - "chars_..." for integer variables that count decoded characters
1505
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001506 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001507 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001508 try:
1509 name = self.name
1510 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001511 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001512 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001513 result += " name={0!r}".format(name)
1514 try:
1515 mode = self.mode
1516 except AttributeError:
1517 pass
1518 else:
1519 result += " mode={0!r}".format(mode)
1520 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001521
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001522 @property
1523 def encoding(self):
1524 return self._encoding
1525
1526 @property
1527 def errors(self):
1528 return self._errors
1529
1530 @property
1531 def line_buffering(self):
1532 return self._line_buffering
1533
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001534 @property
1535 def buffer(self):
1536 return self._buffer
1537
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 def seekable(self):
1539 return self._seekable
1540
1541 def readable(self):
1542 return self.buffer.readable()
1543
1544 def writable(self):
1545 return self.buffer.writable()
1546
1547 def flush(self):
1548 self.buffer.flush()
1549 self._telling = self._seekable
1550
1551 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001552 if self.buffer is not None and not self.closed:
1553 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001554 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555
1556 @property
1557 def closed(self):
1558 return self.buffer.closed
1559
1560 @property
1561 def name(self):
1562 return self.buffer.name
1563
1564 def fileno(self):
1565 return self.buffer.fileno()
1566
1567 def isatty(self):
1568 return self.buffer.isatty()
1569
1570 def write(self, s: str):
1571 if self.closed:
1572 raise ValueError("write to closed file")
1573 if not isinstance(s, str):
1574 raise TypeError("can't write %s to text stream" %
1575 s.__class__.__name__)
1576 length = len(s)
1577 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1578 if haslf and self._writetranslate and self._writenl != "\n":
1579 s = s.replace("\n", self._writenl)
1580 encoder = self._encoder or self._get_encoder()
1581 # XXX What if we were just reading?
1582 b = encoder.encode(s)
1583 self.buffer.write(b)
1584 if self._line_buffering and (haslf or "\r" in s):
1585 self.flush()
1586 self._snapshot = None
1587 if self._decoder:
1588 self._decoder.reset()
1589 return length
1590
1591 def _get_encoder(self):
1592 make_encoder = codecs.getincrementalencoder(self._encoding)
1593 self._encoder = make_encoder(self._errors)
1594 return self._encoder
1595
1596 def _get_decoder(self):
1597 make_decoder = codecs.getincrementaldecoder(self._encoding)
1598 decoder = make_decoder(self._errors)
1599 if self._readuniversal:
1600 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1601 self._decoder = decoder
1602 return decoder
1603
1604 # The following three methods implement an ADT for _decoded_chars.
1605 # Text returned from the decoder is buffered here until the client
1606 # requests it by calling our read() or readline() method.
1607 def _set_decoded_chars(self, chars):
1608 """Set the _decoded_chars buffer."""
1609 self._decoded_chars = chars
1610 self._decoded_chars_used = 0
1611
1612 def _get_decoded_chars(self, n=None):
1613 """Advance into the _decoded_chars buffer."""
1614 offset = self._decoded_chars_used
1615 if n is None:
1616 chars = self._decoded_chars[offset:]
1617 else:
1618 chars = self._decoded_chars[offset:offset + n]
1619 self._decoded_chars_used += len(chars)
1620 return chars
1621
1622 def _rewind_decoded_chars(self, n):
1623 """Rewind the _decoded_chars buffer."""
1624 if self._decoded_chars_used < n:
1625 raise AssertionError("rewind decoded_chars out of bounds")
1626 self._decoded_chars_used -= n
1627
1628 def _read_chunk(self):
1629 """
1630 Read and decode the next chunk of data from the BufferedReader.
1631 """
1632
1633 # The return value is True unless EOF was reached. The decoded
1634 # string is placed in self._decoded_chars (replacing its previous
1635 # value). The entire input chunk is sent to the decoder, though
1636 # some of it may remain buffered in the decoder, yet to be
1637 # converted.
1638
1639 if self._decoder is None:
1640 raise ValueError("no decoder")
1641
1642 if self._telling:
1643 # To prepare for tell(), we need to snapshot a point in the
1644 # file where the decoder's input buffer is empty.
1645
1646 dec_buffer, dec_flags = self._decoder.getstate()
1647 # Given this, we know there was a valid snapshot point
1648 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1649
1650 # Read a chunk, decode it, and put the result in self._decoded_chars.
1651 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1652 eof = not input_chunk
1653 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1654
1655 if self._telling:
1656 # At the snapshot point, len(dec_buffer) bytes before the read,
1657 # the next input to be decoded is dec_buffer + input_chunk.
1658 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1659
1660 return not eof
1661
1662 def _pack_cookie(self, position, dec_flags=0,
1663 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1664 # The meaning of a tell() cookie is: seek to position, set the
1665 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1666 # into the decoder with need_eof as the EOF flag, then skip
1667 # chars_to_skip characters of the decoded result. For most simple
1668 # decoders, tell() will often just give a byte offset in the file.
1669 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1670 (chars_to_skip<<192) | bool(need_eof)<<256)
1671
1672 def _unpack_cookie(self, bigint):
1673 rest, position = divmod(bigint, 1<<64)
1674 rest, dec_flags = divmod(rest, 1<<64)
1675 rest, bytes_to_feed = divmod(rest, 1<<64)
1676 need_eof, chars_to_skip = divmod(rest, 1<<64)
1677 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1678
1679 def tell(self):
1680 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001681 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 if not self._telling:
1683 raise IOError("telling position disabled by next() call")
1684 self.flush()
1685 position = self.buffer.tell()
1686 decoder = self._decoder
1687 if decoder is None or self._snapshot is None:
1688 if self._decoded_chars:
1689 # This should never happen.
1690 raise AssertionError("pending decoded text")
1691 return position
1692
1693 # Skip backward to the snapshot point (see _read_chunk).
1694 dec_flags, next_input = self._snapshot
1695 position -= len(next_input)
1696
1697 # How many decoded characters have been used up since the snapshot?
1698 chars_to_skip = self._decoded_chars_used
1699 if chars_to_skip == 0:
1700 # We haven't moved from the snapshot point.
1701 return self._pack_cookie(position, dec_flags)
1702
1703 # Starting from the snapshot position, we will walk the decoder
1704 # forward until it gives us enough decoded characters.
1705 saved_state = decoder.getstate()
1706 try:
1707 # Note our initial start point.
1708 decoder.setstate((b'', dec_flags))
1709 start_pos = position
1710 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1711 need_eof = 0
1712
1713 # Feed the decoder one byte at a time. As we go, note the
1714 # nearest "safe start point" before the current location
1715 # (a point where the decoder has nothing buffered, so seek()
1716 # can safely start from there and advance to this location).
1717 next_byte = bytearray(1)
1718 for next_byte[0] in next_input:
1719 bytes_fed += 1
1720 chars_decoded += len(decoder.decode(next_byte))
1721 dec_buffer, dec_flags = decoder.getstate()
1722 if not dec_buffer and chars_decoded <= chars_to_skip:
1723 # Decoder buffer is empty, so this is a safe start point.
1724 start_pos += bytes_fed
1725 chars_to_skip -= chars_decoded
1726 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1727 if chars_decoded >= chars_to_skip:
1728 break
1729 else:
1730 # We didn't get enough decoded data; signal EOF to get more.
1731 chars_decoded += len(decoder.decode(b'', final=True))
1732 need_eof = 1
1733 if chars_decoded < chars_to_skip:
1734 raise IOError("can't reconstruct logical file position")
1735
1736 # The returned cookie corresponds to the last safe start point.
1737 return self._pack_cookie(
1738 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1739 finally:
1740 decoder.setstate(saved_state)
1741
1742 def truncate(self, pos=None):
1743 self.flush()
1744 if pos is None:
1745 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001746 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001748 def detach(self):
1749 if self.buffer is None:
1750 raise ValueError("buffer is already detached")
1751 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001752 buffer = self._buffer
1753 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001754 return buffer
1755
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 def seek(self, cookie, whence=0):
1757 if self.closed:
1758 raise ValueError("tell on closed file")
1759 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001760 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 if whence == 1: # seek relative to current position
1762 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001763 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 # Seeking to the current position should attempt to
1765 # sync the underlying buffer with the current position.
1766 whence = 0
1767 cookie = self.tell()
1768 if whence == 2: # seek relative to end of file
1769 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001770 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 self.flush()
1772 position = self.buffer.seek(0, 2)
1773 self._set_decoded_chars('')
1774 self._snapshot = None
1775 if self._decoder:
1776 self._decoder.reset()
1777 return position
1778 if whence != 0:
1779 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1780 (whence,))
1781 if cookie < 0:
1782 raise ValueError("negative seek position %r" % (cookie,))
1783 self.flush()
1784
1785 # The strategy of seek() is to go back to the safe start point
1786 # and replay the effect of read(chars_to_skip) from there.
1787 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1788 self._unpack_cookie(cookie)
1789
1790 # Seek back to the safe start point.
1791 self.buffer.seek(start_pos)
1792 self._set_decoded_chars('')
1793 self._snapshot = None
1794
1795 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001796 if cookie == 0 and self._decoder:
1797 self._decoder.reset()
1798 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 self._decoder = self._decoder or self._get_decoder()
1800 self._decoder.setstate((b'', dec_flags))
1801 self._snapshot = (dec_flags, b'')
1802
1803 if chars_to_skip:
1804 # Just like _read_chunk, feed the decoder and save a snapshot.
1805 input_chunk = self.buffer.read(bytes_to_feed)
1806 self._set_decoded_chars(
1807 self._decoder.decode(input_chunk, need_eof))
1808 self._snapshot = (dec_flags, input_chunk)
1809
1810 # Skip chars_to_skip of the decoded characters.
1811 if len(self._decoded_chars) < chars_to_skip:
1812 raise IOError("can't restore logical file position")
1813 self._decoded_chars_used = chars_to_skip
1814
Antoine Pitroue4501852009-05-14 18:55:55 +00001815 # Finally, reset the encoder (merely useful for proper BOM handling)
1816 try:
1817 encoder = self._encoder or self._get_encoder()
1818 except LookupError:
1819 # Sometimes the encoder doesn't exist
1820 pass
1821 else:
1822 if cookie != 0:
1823 encoder.setstate(0)
1824 else:
1825 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 return cookie
1827
1828 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001829 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830 if n is None:
1831 n = -1
1832 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001833 try:
1834 n.__index__
1835 except AttributeError as err:
1836 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837 if n < 0:
1838 # Read everything.
1839 result = (self._get_decoded_chars() +
1840 decoder.decode(self.buffer.read(), final=True))
1841 self._set_decoded_chars('')
1842 self._snapshot = None
1843 return result
1844 else:
1845 # Keep reading chunks until we have n characters to return.
1846 eof = False
1847 result = self._get_decoded_chars(n)
1848 while len(result) < n and not eof:
1849 eof = not self._read_chunk()
1850 result += self._get_decoded_chars(n - len(result))
1851 return result
1852
1853 def __next__(self):
1854 self._telling = False
1855 line = self.readline()
1856 if not line:
1857 self._snapshot = None
1858 self._telling = self._seekable
1859 raise StopIteration
1860 return line
1861
1862 def readline(self, limit=None):
1863 if self.closed:
1864 raise ValueError("read from closed file")
1865 if limit is None:
1866 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001867 elif not isinstance(limit, int):
1868 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869
1870 # Grab all the decoded text (we will rewind any extra bits later).
1871 line = self._get_decoded_chars()
1872
1873 start = 0
1874 # Make the decoder if it doesn't already exist.
1875 if not self._decoder:
1876 self._get_decoder()
1877
1878 pos = endpos = None
1879 while True:
1880 if self._readtranslate:
1881 # Newlines are already translated, only search for \n
1882 pos = line.find('\n', start)
1883 if pos >= 0:
1884 endpos = pos + 1
1885 break
1886 else:
1887 start = len(line)
1888
1889 elif self._readuniversal:
1890 # Universal newline search. Find any of \r, \r\n, \n
1891 # The decoder ensures that \r\n are not split in two pieces
1892
1893 # In C we'd look for these in parallel of course.
1894 nlpos = line.find("\n", start)
1895 crpos = line.find("\r", start)
1896 if crpos == -1:
1897 if nlpos == -1:
1898 # Nothing found
1899 start = len(line)
1900 else:
1901 # Found \n
1902 endpos = nlpos + 1
1903 break
1904 elif nlpos == -1:
1905 # Found lone \r
1906 endpos = crpos + 1
1907 break
1908 elif nlpos < crpos:
1909 # Found \n
1910 endpos = nlpos + 1
1911 break
1912 elif nlpos == crpos + 1:
1913 # Found \r\n
1914 endpos = crpos + 2
1915 break
1916 else:
1917 # Found \r
1918 endpos = crpos + 1
1919 break
1920 else:
1921 # non-universal
1922 pos = line.find(self._readnl)
1923 if pos >= 0:
1924 endpos = pos + len(self._readnl)
1925 break
1926
1927 if limit >= 0 and len(line) >= limit:
1928 endpos = limit # reached length limit
1929 break
1930
1931 # No line ending seen yet - get more data'
1932 while self._read_chunk():
1933 if self._decoded_chars:
1934 break
1935 if self._decoded_chars:
1936 line += self._get_decoded_chars()
1937 else:
1938 # end of file
1939 self._set_decoded_chars('')
1940 self._snapshot = None
1941 return line
1942
1943 if limit >= 0 and endpos > limit:
1944 endpos = limit # don't exceed limit
1945
1946 # Rewind _decoded_chars to just after the line ending we found.
1947 self._rewind_decoded_chars(len(line) - endpos)
1948 return line[:endpos]
1949
1950 @property
1951 def newlines(self):
1952 return self._decoder.newlines if self._decoder else None
1953
1954
1955class StringIO(TextIOWrapper):
1956 """Text I/O implementation using an in-memory buffer.
1957
1958 The initial_value argument sets the value of object. The newline
1959 argument is like the one of TextIOWrapper's constructor.
1960 """
1961
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 def __init__(self, initial_value="", newline="\n"):
1963 super(StringIO, self).__init__(BytesIO(),
1964 encoding="utf-8",
1965 errors="strict",
1966 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001967 # Issue #5645: make universal newlines semantics the same as in the
1968 # C version, even under Windows.
1969 if newline is None:
1970 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001971 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001973 raise TypeError("initial_value must be str or None, not {0}"
1974 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001975 initial_value = str(initial_value)
1976 self.write(initial_value)
1977 self.seek(0)
1978
1979 def getvalue(self):
1980 self.flush()
1981 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001982
1983 def __repr__(self):
1984 # TextIOWrapper tells the encoding in its repr. In StringIO,
1985 # that's a implementation detail.
1986 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001987
1988 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001989 def errors(self):
1990 return None
1991
1992 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001993 def encoding(self):
1994 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001995
1996 def detach(self):
1997 # This doesn't make sense on StringIO.
1998 self._unsupported("detach")