blob: 3bd35d2e924e2c1594783ed210e9965856250383 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020025# Rebind for compatibility
26BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000027
28
Georg Brandl4d73b572011-01-13 07:13:06 +000029def open(file, mode="r", buffering=-1, encoding=None, errors=None,
30 newline=None, closefd=True):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000031
32 r"""Open file and return a stream. Raise IOError upon failure.
33
34 file is either a text or byte string giving the name (and the path
35 if the file isn't in the current working directory) of the file to
36 be opened or an integer file descriptor of the file to be
37 wrapped. (If a file descriptor is given, it is closed when the
38 returned I/O object is closed, unless closefd is set to False.)
39
40 mode is an optional string that specifies the mode in which the file
41 is opened. It defaults to 'r' which means open for reading in text
42 mode. Other common values are 'w' for writing (truncating the file if
43 it already exists), and 'a' for appending (which on some Unix systems,
44 means that all writes append to the end of the file regardless of the
45 current seek position). In text mode, if encoding is not specified the
46 encoding used is platform dependent. (For reading and writing raw
47 bytes use binary mode and leave encoding unspecified.) The available
48 modes are:
49
50 ========= ===============================================================
51 Character Meaning
52 --------- ---------------------------------------------------------------
53 'r' open for reading (default)
54 'w' open for writing, truncating the file first
55 'a' open for writing, appending to the end of the file if it exists
56 'b' binary mode
57 't' text mode (default)
58 '+' open a disk file for updating (reading and writing)
59 'U' universal newline mode (for backwards compatibility; unneeded
60 for new code)
61 ========= ===============================================================
62
63 The default mode is 'rt' (open for reading text). For binary random
64 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
65 'r+b' opens the file without truncation.
66
67 Python distinguishes between files opened in binary and text modes,
68 even when the underlying operating system doesn't. Files opened in
69 binary mode (appending 'b' to the mode argument) return contents as
70 bytes objects without any decoding. In text mode (the default, or when
71 't' is appended to the mode argument), the contents of the file are
72 returned as strings, the bytes having been first decoded using a
73 platform-dependent encoding or using the specified encoding if given.
74
Antoine Pitroud5587bc2009-12-19 21:08:31 +000075 buffering is an optional integer used to set the buffering policy.
76 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
77 line buffering (only usable in text mode), and an integer > 1 to indicate
78 the size of a fixed-size chunk buffer. When no buffering argument is
79 given, the default buffering policy works as follows:
80
81 * Binary files are buffered in fixed-size chunks; the size of the buffer
82 is chosen using a heuristic trying to determine the underlying device's
83 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
84 On many systems, the buffer will typically be 4096 or 8192 bytes long.
85
86 * "Interactive" text files (files for which isatty() returns True)
87 use line buffering. Other text files use the policy described above
88 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089
Raymond Hettingercbb80892011-01-13 18:15:51 +000090 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 file. This should only be used in text mode. The default encoding is
92 platform dependent, but any encoding supported by Python can be
93 passed. See the codecs module for the list of supported encodings.
94
95 errors is an optional string that specifies how encoding errors are to
96 be handled---this argument should not be used in binary mode. Pass
97 'strict' to raise a ValueError exception if there is an encoding error
98 (the default of None has the same effect), or pass 'ignore' to ignore
99 errors. (Note that ignoring encoding errors can lead to data loss.)
100 See the documentation for codecs.register for a list of the permitted
101 encoding error strings.
102
Raymond Hettingercbb80892011-01-13 18:15:51 +0000103 newline is a string controlling how universal newlines works (it only
104 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
105 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000106
107 * On input, if newline is None, universal newlines mode is
108 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
109 these are translated into '\n' before being returned to the
110 caller. If it is '', universal newline mode is enabled, but line
111 endings are returned to the caller untranslated. If it has any of
112 the other legal values, input lines are only terminated by the given
113 string, and the line ending is returned to the caller untranslated.
114
115 * On output, if newline is None, any '\n' characters written are
116 translated to the system default line separator, os.linesep. If
117 newline is '', no translation takes place. If newline is any of the
118 other legal values, any '\n' characters written are translated to
119 the given string.
120
Raymond Hettingercbb80892011-01-13 18:15:51 +0000121 closedfd is a bool. If closefd is False, the underlying file descriptor will
122 be kept open when the file is closed. This does not work when a file name is
123 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124
125 open() returns a file object whose type depends on the mode, and
126 through which the standard file operations such as reading and writing
127 are performed. When open() is used to open a file in a text mode ('w',
128 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
129 a file in a binary mode, the returned class varies: in read binary
130 mode, it returns a BufferedReader; in write binary and append binary
131 modes, it returns a BufferedWriter, and in read/write mode, it returns
132 a BufferedRandom.
133
134 It is also possible to use a string or bytearray as a file for both
135 reading and writing. For strings StringIO can be used like a file
136 opened in a text mode, and for bytes a BytesIO can be used like a file
137 opened in a binary mode.
138 """
139 if not isinstance(file, (str, bytes, int)):
140 raise TypeError("invalid file: %r" % file)
141 if not isinstance(mode, str):
142 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000143 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000144 raise TypeError("invalid buffering: %r" % buffering)
145 if encoding is not None and not isinstance(encoding, str):
146 raise TypeError("invalid encoding: %r" % encoding)
147 if errors is not None and not isinstance(errors, str):
148 raise TypeError("invalid errors: %r" % errors)
149 modes = set(mode)
150 if modes - set("arwb+tU") or len(mode) > len(modes):
151 raise ValueError("invalid mode: %r" % mode)
152 reading = "r" in modes
153 writing = "w" in modes
154 appending = "a" in modes
155 updating = "+" in modes
156 text = "t" in modes
157 binary = "b" in modes
158 if "U" in modes:
159 if writing or appending:
160 raise ValueError("can't use U and writing mode at once")
161 reading = True
162 if text and binary:
163 raise ValueError("can't have text and binary mode at once")
164 if reading + writing + appending > 1:
165 raise ValueError("can't have read/write/append mode at once")
166 if not (reading or writing or appending):
167 raise ValueError("must have exactly one of read/write/append mode")
168 if binary and encoding is not None:
169 raise ValueError("binary mode doesn't take an encoding argument")
170 if binary and errors is not None:
171 raise ValueError("binary mode doesn't take an errors argument")
172 if binary and newline is not None:
173 raise ValueError("binary mode doesn't take a newline argument")
174 raw = FileIO(file,
175 (reading and "r" or "") +
176 (writing and "w" or "") +
177 (appending and "a" or "") +
178 (updating and "+" or ""),
179 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 line_buffering = False
181 if buffering == 1 or buffering < 0 and raw.isatty():
182 buffering = -1
183 line_buffering = True
184 if buffering < 0:
185 buffering = DEFAULT_BUFFER_SIZE
186 try:
187 bs = os.fstat(raw.fileno()).st_blksize
188 except (os.error, AttributeError):
189 pass
190 else:
191 if bs > 1:
192 buffering = bs
193 if buffering < 0:
194 raise ValueError("invalid buffering size")
195 if buffering == 0:
196 if binary:
197 return raw
198 raise ValueError("can't have unbuffered text I/O")
199 if updating:
200 buffer = BufferedRandom(raw, buffering)
201 elif writing or appending:
202 buffer = BufferedWriter(raw, buffering)
203 elif reading:
204 buffer = BufferedReader(raw, buffering)
205 else:
206 raise ValueError("unknown mode: %r" % mode)
207 if binary:
208 return buffer
209 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
210 text.mode = mode
211 return text
212
213
214class DocDescriptor:
215 """Helper for builtins.open.__doc__
216 """
217 def __get__(self, obj, typ):
218 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000219 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220 "errors=None, newline=None, closefd=True)\n\n" +
221 open.__doc__)
222
223class OpenWrapper:
224 """Wrapper for builtins.open
225
226 Trick so that open won't become a bound method when stored
227 as a class variable (as dbm.dumb does).
228
229 See initstdio() in Python/pythonrun.c.
230 """
231 __doc__ = DocDescriptor()
232
233 def __new__(cls, *args, **kwargs):
234 return open(*args, **kwargs)
235
236
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000237# In normal operation, both `UnsupportedOperation`s should be bound to the
238# same object.
239try:
240 UnsupportedOperation = io.UnsupportedOperation
241except AttributeError:
242 class UnsupportedOperation(ValueError, IOError):
243 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000244
245
246class IOBase(metaclass=abc.ABCMeta):
247
248 """The abstract base class for all I/O classes, acting on streams of
249 bytes. There is no public constructor.
250
251 This class provides dummy implementations for many methods that
252 derived classes can override selectively; the default implementations
253 represent a file that cannot be read, written or seeked.
254
255 Even though IOBase does not declare read, readinto, or write because
256 their signatures will vary, implementations and clients should
257 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000258 may raise UnsupportedOperation when operations they do not support are
259 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260
261 The basic type used for binary data read from or written to a file is
262 bytes. bytearrays are accepted too, and in some cases (such as
263 readinto) needed. Text I/O classes work with str data.
264
265 Note that calling any method (even inquiries) on a closed stream is
266 undefined. Implementations may raise IOError in this case.
267
268 IOBase (and its subclasses) support the iterator protocol, meaning
269 that an IOBase object can be iterated over yielding the lines in a
270 stream.
271
272 IOBase also supports the :keyword:`with` statement. In this example,
273 fp is closed after the suite of the with statement is complete:
274
275 with open('spam.txt', 'r') as fp:
276 fp.write('Spam and eggs!')
277 """
278
279 ### Internal ###
280
Raymond Hettinger3c940242011-01-12 23:39:31 +0000281 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000282 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000283 raise UnsupportedOperation("%s.%s() not supported" %
284 (self.__class__.__name__, name))
285
286 ### Positioning ###
287
Georg Brandl4d73b572011-01-13 07:13:06 +0000288 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289 """Change stream position.
290
291 Change the stream position to byte offset offset. offset is
292 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000293 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000294
295 * 0 -- start of stream (the default); offset should be zero or positive
296 * 1 -- current stream position; offset may be negative
297 * 2 -- end of stream; offset is usually negative
298
Raymond Hettingercbb80892011-01-13 18:15:51 +0000299 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000300 """
301 self._unsupported("seek")
302
Raymond Hettinger3c940242011-01-12 23:39:31 +0000303 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000304 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 return self.seek(0, 1)
306
Georg Brandl4d73b572011-01-13 07:13:06 +0000307 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 """Truncate file to size bytes.
309
310 Size defaults to the current IO position as reported by tell(). Return
311 the new size.
312 """
313 self._unsupported("truncate")
314
315 ### Flush and close ###
316
Raymond Hettinger3c940242011-01-12 23:39:31 +0000317 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 """Flush write buffers, if applicable.
319
320 This is not implemented for read-only and non-blocking streams.
321 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000322 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 # XXX Should this return the number of bytes written???
324
325 __closed = False
326
Raymond Hettinger3c940242011-01-12 23:39:31 +0000327 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 """Flush and close the IO object.
329
330 This method has no effect if the file is already closed.
331 """
332 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000333 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self.__closed = True
335
Raymond Hettinger3c940242011-01-12 23:39:31 +0000336 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """Destructor. Calls close()."""
338 # The try/except block is in case this is called at program
339 # exit time, when it's possible that globals have already been
340 # deleted, and then the close() call might fail. Since
341 # there's nothing we can do about such failures and they annoy
342 # the end users, we suppress the traceback.
343 try:
344 self.close()
345 except:
346 pass
347
348 ### Inquiries ###
349
Raymond Hettinger3c940242011-01-12 23:39:31 +0000350 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000351 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000353 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 This method may need to do a test seek().
355 """
356 return False
357
358 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000359 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 """
361 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000362 raise UnsupportedOperation("File or stream is not seekable."
363 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364
Raymond Hettinger3c940242011-01-12 23:39:31 +0000365 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000366 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 return False
371
372 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000373 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 """
375 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000376 raise UnsupportedOperation("File or stream is not readable."
377 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378
Raymond Hettinger3c940242011-01-12 23:39:31 +0000379 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000380 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 return False
385
386 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000387 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 """
389 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000390 raise UnsupportedOperation("File or stream is not writable."
391 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392
393 @property
394 def closed(self):
395 """closed: bool. True iff the file has been closed.
396
397 For backwards compatibility, this is a property, not a predicate.
398 """
399 return self.__closed
400
401 def _checkClosed(self, msg=None):
402 """Internal: raise an ValueError if file is closed
403 """
404 if self.closed:
405 raise ValueError("I/O operation on closed file."
406 if msg is None else msg)
407
408 ### Context manager ###
409
Raymond Hettinger3c940242011-01-12 23:39:31 +0000410 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000411 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 self._checkClosed()
413 return self
414
Raymond Hettinger3c940242011-01-12 23:39:31 +0000415 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 """Context management protocol. Calls close()"""
417 self.close()
418
419 ### Lower-level APIs ###
420
421 # XXX Should these be present even if unimplemented?
422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000424 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425
426 An IOError is raised if the IO object does not use a file descriptor.
427 """
428 self._unsupported("fileno")
429
Raymond Hettinger3c940242011-01-12 23:39:31 +0000430 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000431 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432
433 Return False if it can't be determined.
434 """
435 self._checkClosed()
436 return False
437
438 ### Readline[s] and writelines ###
439
Georg Brandl4d73b572011-01-13 07:13:06 +0000440 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000441 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442
443 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000444 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445
446 The line terminator is always b'\n' for binary files; for text
447 files, the newlines argument to open can be used to select the line
448 terminator(s) recognized.
449 """
450 # For backwards compatibility, a (slowish) readline().
451 if hasattr(self, "peek"):
452 def nreadahead():
453 readahead = self.peek(1)
454 if not readahead:
455 return 1
456 n = (readahead.find(b"\n") + 1) or len(readahead)
457 if limit >= 0:
458 n = min(n, limit)
459 return n
460 else:
461 def nreadahead():
462 return 1
463 if limit is None:
464 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000465 elif not isinstance(limit, int):
466 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 res = bytearray()
468 while limit < 0 or len(res) < limit:
469 b = self.read(nreadahead())
470 if not b:
471 break
472 res += b
473 if res.endswith(b"\n"):
474 break
475 return bytes(res)
476
477 def __iter__(self):
478 self._checkClosed()
479 return self
480
481 def __next__(self):
482 line = self.readline()
483 if not line:
484 raise StopIteration
485 return line
486
487 def readlines(self, hint=None):
488 """Return a list of lines from the stream.
489
490 hint can be specified to control the number of lines read: no more
491 lines will be read if the total size (in bytes/characters) of all
492 lines so far exceeds hint.
493 """
494 if hint is None or hint <= 0:
495 return list(self)
496 n = 0
497 lines = []
498 for line in self:
499 lines.append(line)
500 n += len(line)
501 if n >= hint:
502 break
503 return lines
504
505 def writelines(self, lines):
506 self._checkClosed()
507 for line in lines:
508 self.write(line)
509
510io.IOBase.register(IOBase)
511
512
513class RawIOBase(IOBase):
514
515 """Base class for raw binary I/O."""
516
517 # The read() method is implemented by calling readinto(); derived
518 # classes that want to support read() only need to implement
519 # readinto() as a primitive operation. In general, readinto() can be
520 # more efficient than read().
521
522 # (It would be tempting to also provide an implementation of
523 # readinto() in terms of read(), in case the latter is a more suitable
524 # primitive operation, but that would lead to nasty recursion in case
525 # a subclass doesn't implement either.)
526
Georg Brandl4d73b572011-01-13 07:13:06 +0000527 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000528 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 Returns an empty bytes object on EOF, or None if the object is
531 set not to block and has no data to read.
532 """
533 if n is None:
534 n = -1
535 if n < 0:
536 return self.readall()
537 b = bytearray(n.__index__())
538 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000539 if n is None:
540 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000541 del b[n:]
542 return bytes(b)
543
544 def readall(self):
545 """Read until EOF, using multiple read() call."""
546 res = bytearray()
547 while True:
548 data = self.read(DEFAULT_BUFFER_SIZE)
549 if not data:
550 break
551 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200552 if res:
553 return bytes(res)
554 else:
555 # b'' or None
556 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557
Raymond Hettinger3c940242011-01-12 23:39:31 +0000558 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000559 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000560
Raymond Hettingercbb80892011-01-13 18:15:51 +0000561 Returns an int representing the number of bytes read (0 for EOF), or
562 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000563 """
564 self._unsupported("readinto")
565
Raymond Hettinger3c940242011-01-12 23:39:31 +0000566 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """Write the given buffer to the IO stream.
568
569 Returns the number of bytes written, which may be less than len(b).
570 """
571 self._unsupported("write")
572
573io.RawIOBase.register(RawIOBase)
574from _io import FileIO
575RawIOBase.register(FileIO)
576
577
578class BufferedIOBase(IOBase):
579
580 """Base class for buffered IO objects.
581
582 The main difference with RawIOBase is that the read() method
583 supports omitting the size argument, and does not have a default
584 implementation that defers to readinto().
585
586 In addition, read(), readinto() and write() may raise
587 BlockingIOError if the underlying raw stream is in non-blocking
588 mode and not ready; unlike their raw counterparts, they will never
589 return None.
590
591 A typical implementation should not inherit from a RawIOBase
592 implementation, but wrap one.
593 """
594
Georg Brandl4d73b572011-01-13 07:13:06 +0000595 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000596 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597
598 If the argument is omitted, None, or negative, reads and
599 returns all data until EOF.
600
601 If the argument is positive, and the underlying raw stream is
602 not 'interactive', multiple raw reads may be issued to satisfy
603 the byte count (unless EOF is reached first). But for
604 interactive raw streams (XXX and for pipes?), at most one raw
605 read will be issued, and a short result does not imply that
606 EOF is imminent.
607
608 Returns an empty bytes array on EOF.
609
610 Raises BlockingIOError if the underlying raw stream has no
611 data at the moment.
612 """
613 self._unsupported("read")
614
Georg Brandl4d73b572011-01-13 07:13:06 +0000615 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000616 """Read up to n bytes with at most one read() system call,
617 where n is an int.
618 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000619 self._unsupported("read1")
620
Raymond Hettinger3c940242011-01-12 23:39:31 +0000621 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000622 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623
624 Like read(), this may issue multiple reads to the underlying raw
625 stream, unless the latter is 'interactive'.
626
Raymond Hettingercbb80892011-01-13 18:15:51 +0000627 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000628
629 Raises BlockingIOError if the underlying raw stream has no
630 data at the moment.
631 """
632 # XXX This ought to work with anything that supports the buffer API
633 data = self.read(len(b))
634 n = len(data)
635 try:
636 b[:n] = data
637 except TypeError as err:
638 import array
639 if not isinstance(b, array.array):
640 raise err
641 b[:n] = array.array('b', data)
642 return n
643
Raymond Hettinger3c940242011-01-12 23:39:31 +0000644 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000645 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646
647 Return the number of bytes written, which is never less than
648 len(b).
649
650 Raises BlockingIOError if the buffer is full and the
651 underlying raw stream cannot accept more data at the moment.
652 """
653 self._unsupported("write")
654
Raymond Hettinger3c940242011-01-12 23:39:31 +0000655 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000656 """
657 Separate the underlying raw stream from the buffer and return it.
658
659 After the raw stream has been detached, the buffer is in an unusable
660 state.
661 """
662 self._unsupported("detach")
663
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664io.BufferedIOBase.register(BufferedIOBase)
665
666
667class _BufferedIOMixin(BufferedIOBase):
668
669 """A mixin implementation of BufferedIOBase with an underlying raw stream.
670
671 This passes most requests on to the underlying raw stream. It
672 does *not* provide implementations of read(), readinto() or
673 write().
674 """
675
676 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000677 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678
679 ### Positioning ###
680
681 def seek(self, pos, whence=0):
682 new_position = self.raw.seek(pos, whence)
683 if new_position < 0:
684 raise IOError("seek() returned an invalid position")
685 return new_position
686
687 def tell(self):
688 pos = self.raw.tell()
689 if pos < 0:
690 raise IOError("tell() returned an invalid position")
691 return pos
692
693 def truncate(self, pos=None):
694 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
695 # and a flush may be necessary to synch both views of the current
696 # file state.
697 self.flush()
698
699 if pos is None:
700 pos = self.tell()
701 # XXX: Should seek() be used, instead of passing the position
702 # XXX directly to truncate?
703 return self.raw.truncate(pos)
704
705 ### Flush and close ###
706
707 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000708 if self.closed:
709 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710 self.raw.flush()
711
712 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000713 if self.raw is not None and not self.closed:
714 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self.raw.close()
716
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000717 def detach(self):
718 if self.raw is None:
719 raise ValueError("raw stream already detached")
720 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000721 raw = self._raw
722 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000723 return raw
724
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725 ### Inquiries ###
726
727 def seekable(self):
728 return self.raw.seekable()
729
730 def readable(self):
731 return self.raw.readable()
732
733 def writable(self):
734 return self.raw.writable()
735
736 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000737 def raw(self):
738 return self._raw
739
740 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741 def closed(self):
742 return self.raw.closed
743
744 @property
745 def name(self):
746 return self.raw.name
747
748 @property
749 def mode(self):
750 return self.raw.mode
751
Antoine Pitrou243757e2010-11-05 21:15:39 +0000752 def __getstate__(self):
753 raise TypeError("can not serialize a '{0}' object"
754 .format(self.__class__.__name__))
755
Antoine Pitrou716c4442009-05-23 19:04:03 +0000756 def __repr__(self):
757 clsname = self.__class__.__name__
758 try:
759 name = self.name
760 except AttributeError:
761 return "<_pyio.{0}>".format(clsname)
762 else:
763 return "<_pyio.{0} name={1!r}>".format(clsname, name)
764
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765 ### Lower-level APIs ###
766
767 def fileno(self):
768 return self.raw.fileno()
769
770 def isatty(self):
771 return self.raw.isatty()
772
773
774class BytesIO(BufferedIOBase):
775
776 """Buffered I/O implementation using an in-memory bytes buffer."""
777
778 def __init__(self, initial_bytes=None):
779 buf = bytearray()
780 if initial_bytes is not None:
781 buf += initial_bytes
782 self._buffer = buf
783 self._pos = 0
784
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000785 def __getstate__(self):
786 if self.closed:
787 raise ValueError("__getstate__ on closed file")
788 return self.__dict__.copy()
789
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790 def getvalue(self):
791 """Return the bytes value (contents) of the buffer
792 """
793 if self.closed:
794 raise ValueError("getvalue on closed file")
795 return bytes(self._buffer)
796
Antoine Pitrou972ee132010-09-06 18:48:21 +0000797 def getbuffer(self):
798 """Return a readable and writable view of the buffer.
799 """
800 return memoryview(self._buffer)
801
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 def read(self, n=None):
803 if self.closed:
804 raise ValueError("read from closed file")
805 if n is None:
806 n = -1
807 if n < 0:
808 n = len(self._buffer)
809 if len(self._buffer) <= self._pos:
810 return b""
811 newpos = min(len(self._buffer), self._pos + n)
812 b = self._buffer[self._pos : newpos]
813 self._pos = newpos
814 return bytes(b)
815
816 def read1(self, n):
817 """This is the same as read.
818 """
819 return self.read(n)
820
821 def write(self, b):
822 if self.closed:
823 raise ValueError("write to closed file")
824 if isinstance(b, str):
825 raise TypeError("can't write str to binary stream")
826 n = len(b)
827 if n == 0:
828 return 0
829 pos = self._pos
830 if pos > len(self._buffer):
831 # Inserts null bytes between the current end of the file
832 # and the new write position.
833 padding = b'\x00' * (pos - len(self._buffer))
834 self._buffer += padding
835 self._buffer[pos:pos + n] = b
836 self._pos += n
837 return n
838
839 def seek(self, pos, whence=0):
840 if self.closed:
841 raise ValueError("seek on closed file")
842 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000843 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 except AttributeError as err:
845 raise TypeError("an integer is required") from err
846 if whence == 0:
847 if pos < 0:
848 raise ValueError("negative seek position %r" % (pos,))
849 self._pos = pos
850 elif whence == 1:
851 self._pos = max(0, self._pos + pos)
852 elif whence == 2:
853 self._pos = max(0, len(self._buffer) + pos)
854 else:
855 raise ValueError("invalid whence value")
856 return self._pos
857
858 def tell(self):
859 if self.closed:
860 raise ValueError("tell on closed file")
861 return self._pos
862
863 def truncate(self, pos=None):
864 if self.closed:
865 raise ValueError("truncate on closed file")
866 if pos is None:
867 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000868 else:
869 try:
870 pos.__index__
871 except AttributeError as err:
872 raise TypeError("an integer is required") from err
873 if pos < 0:
874 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000876 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877
878 def readable(self):
879 return True
880
881 def writable(self):
882 return True
883
884 def seekable(self):
885 return True
886
887
888class BufferedReader(_BufferedIOMixin):
889
890 """BufferedReader(raw[, buffer_size])
891
892 A buffer for a readable, sequential BaseRawIO object.
893
894 The constructor creates a BufferedReader for the given readable raw
895 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
896 is used.
897 """
898
899 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
900 """Create a new buffered reader using the given readable raw IO object.
901 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000902 if not raw.readable():
903 raise IOError('"raw" argument must be readable.')
904
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000905 _BufferedIOMixin.__init__(self, raw)
906 if buffer_size <= 0:
907 raise ValueError("invalid buffer size")
908 self.buffer_size = buffer_size
909 self._reset_read_buf()
910 self._read_lock = Lock()
911
912 def _reset_read_buf(self):
913 self._read_buf = b""
914 self._read_pos = 0
915
916 def read(self, n=None):
917 """Read n bytes.
918
919 Returns exactly n bytes of data unless the underlying raw IO
920 stream reaches EOF or if the call would block in non-blocking
921 mode. If n is negative, read until EOF or until read() would
922 block.
923 """
924 if n is not None and n < -1:
925 raise ValueError("invalid number of bytes to read")
926 with self._read_lock:
927 return self._read_unlocked(n)
928
929 def _read_unlocked(self, n=None):
930 nodata_val = b""
931 empty_values = (b"", None)
932 buf = self._read_buf
933 pos = self._read_pos
934
935 # Special case for when the number of bytes to read is unspecified.
936 if n is None or n == -1:
937 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200938 if hasattr(self.raw, 'readall'):
939 chunk = self.raw.readall()
940 if chunk is None:
941 return buf[pos:] or None
942 else:
943 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 chunks = [buf[pos:]] # Strip the consumed bytes.
945 current_size = 0
946 while True:
947 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000948 try:
949 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200950 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000951 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000952 if chunk in empty_values:
953 nodata_val = chunk
954 break
955 current_size += len(chunk)
956 chunks.append(chunk)
957 return b"".join(chunks) or nodata_val
958
959 # The number of bytes to read is specified, return at most n bytes.
960 avail = len(buf) - pos # Length of the available buffered data.
961 if n <= avail:
962 # Fast path: the data to read is fully buffered.
963 self._read_pos += n
964 return buf[pos:pos+n]
965 # Slow path: read from the stream until enough bytes are read,
966 # or until an EOF occurs or until read() would block.
967 chunks = [buf[pos:]]
968 wanted = max(self.buffer_size, n)
969 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000970 try:
971 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200972 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000973 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if chunk in empty_values:
975 nodata_val = chunk
976 break
977 avail += len(chunk)
978 chunks.append(chunk)
979 # n is more then avail only when an EOF occurred or when
980 # read() would have blocked.
981 n = min(n, avail)
982 out = b"".join(chunks)
983 self._read_buf = out[n:] # Save the extra data in the buffer.
984 self._read_pos = 0
985 return out[:n] if out else nodata_val
986
987 def peek(self, n=0):
988 """Returns buffered bytes without advancing the position.
989
990 The argument indicates a desired minimal number of bytes; we
991 do at most one raw read to satisfy it. We never return more
992 than self.buffer_size.
993 """
994 with self._read_lock:
995 return self._peek_unlocked(n)
996
997 def _peek_unlocked(self, n=0):
998 want = min(n, self.buffer_size)
999 have = len(self._read_buf) - self._read_pos
1000 if have < want or have <= 0:
1001 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001002 while True:
1003 try:
1004 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001005 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001006 continue
1007 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001008 if current:
1009 self._read_buf = self._read_buf[self._read_pos:] + current
1010 self._read_pos = 0
1011 return self._read_buf[self._read_pos:]
1012
1013 def read1(self, n):
1014 """Reads up to n bytes, with at most one read() system call."""
1015 # Returns up to n bytes. If at least one byte is buffered, we
1016 # only return buffered bytes. Otherwise, we do one raw read.
1017 if n < 0:
1018 raise ValueError("number of bytes to read must be positive")
1019 if n == 0:
1020 return b""
1021 with self._read_lock:
1022 self._peek_unlocked(1)
1023 return self._read_unlocked(
1024 min(n, len(self._read_buf) - self._read_pos))
1025
1026 def tell(self):
1027 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1028
1029 def seek(self, pos, whence=0):
1030 if not (0 <= whence <= 2):
1031 raise ValueError("invalid whence value")
1032 with self._read_lock:
1033 if whence == 1:
1034 pos -= len(self._read_buf) - self._read_pos
1035 pos = _BufferedIOMixin.seek(self, pos, whence)
1036 self._reset_read_buf()
1037 return pos
1038
1039class BufferedWriter(_BufferedIOMixin):
1040
1041 """A buffer for a writeable sequential RawIO object.
1042
1043 The constructor creates a BufferedWriter for the given writeable raw
1044 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001045 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 """
1047
Benjamin Peterson59406a92009-03-26 17:10:29 +00001048 _warning_stack_offset = 2
1049
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 def __init__(self, raw,
1051 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001052 if not raw.writable():
1053 raise IOError('"raw" argument must be writable.')
1054
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 _BufferedIOMixin.__init__(self, raw)
1056 if buffer_size <= 0:
1057 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001058 if max_buffer_size is not None:
1059 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1060 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001062 self._write_buf = bytearray()
1063 self._write_lock = Lock()
1064
1065 def write(self, b):
1066 if self.closed:
1067 raise ValueError("write to closed file")
1068 if isinstance(b, str):
1069 raise TypeError("can't write str to binary stream")
1070 with self._write_lock:
1071 # XXX we can implement some more tricks to try and avoid
1072 # partial writes
1073 if len(self._write_buf) > self.buffer_size:
1074 # We're full, so let's pre-flush the buffer
1075 try:
1076 self._flush_unlocked()
1077 except BlockingIOError as e:
1078 # We can't accept anything else.
1079 # XXX Why not just let the exception pass through?
1080 raise BlockingIOError(e.errno, e.strerror, 0)
1081 before = len(self._write_buf)
1082 self._write_buf.extend(b)
1083 written = len(self._write_buf) - before
1084 if len(self._write_buf) > self.buffer_size:
1085 try:
1086 self._flush_unlocked()
1087 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001088 if len(self._write_buf) > self.buffer_size:
1089 # We've hit the buffer_size. We have to accept a partial
1090 # write and cut back our buffer.
1091 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001092 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001093 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 raise BlockingIOError(e.errno, e.strerror, written)
1095 return written
1096
1097 def truncate(self, pos=None):
1098 with self._write_lock:
1099 self._flush_unlocked()
1100 if pos is None:
1101 pos = self.raw.tell()
1102 return self.raw.truncate(pos)
1103
1104 def flush(self):
1105 with self._write_lock:
1106 self._flush_unlocked()
1107
1108 def _flush_unlocked(self):
1109 if self.closed:
1110 raise ValueError("flush of closed file")
1111 written = 0
1112 try:
1113 while self._write_buf:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001114 try:
1115 n = self.raw.write(self._write_buf)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001116 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001117 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001118 if n > len(self._write_buf) or n < 0:
1119 raise IOError("write() returned incorrect number of bytes")
1120 del self._write_buf[:n]
1121 written += n
1122 except BlockingIOError as e:
1123 n = e.characters_written
1124 del self._write_buf[:n]
1125 written += n
1126 raise BlockingIOError(e.errno, e.strerror, written)
1127
1128 def tell(self):
1129 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1130
1131 def seek(self, pos, whence=0):
1132 if not (0 <= whence <= 2):
1133 raise ValueError("invalid whence")
1134 with self._write_lock:
1135 self._flush_unlocked()
1136 return _BufferedIOMixin.seek(self, pos, whence)
1137
1138
1139class BufferedRWPair(BufferedIOBase):
1140
1141 """A buffered reader and writer object together.
1142
1143 A buffered reader object and buffered writer object put together to
1144 form a sequential IO object that can read and write. This is typically
1145 used with a socket or two-way pipe.
1146
1147 reader and writer are RawIOBase objects that are readable and
1148 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001149 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 """
1151
1152 # XXX The usefulness of this (compared to having two separate IO
1153 # objects) is questionable.
1154
1155 def __init__(self, reader, writer,
1156 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1157 """Constructor.
1158
1159 The arguments are two RawIO instances.
1160 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001161 if max_buffer_size is not None:
1162 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001163
1164 if not reader.readable():
1165 raise IOError('"reader" argument must be readable.')
1166
1167 if not writer.writable():
1168 raise IOError('"writer" argument must be writable.')
1169
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001171 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172
1173 def read(self, n=None):
1174 if n is None:
1175 n = -1
1176 return self.reader.read(n)
1177
1178 def readinto(self, b):
1179 return self.reader.readinto(b)
1180
1181 def write(self, b):
1182 return self.writer.write(b)
1183
1184 def peek(self, n=0):
1185 return self.reader.peek(n)
1186
1187 def read1(self, n):
1188 return self.reader.read1(n)
1189
1190 def readable(self):
1191 return self.reader.readable()
1192
1193 def writable(self):
1194 return self.writer.writable()
1195
1196 def flush(self):
1197 return self.writer.flush()
1198
1199 def close(self):
1200 self.writer.close()
1201 self.reader.close()
1202
1203 def isatty(self):
1204 return self.reader.isatty() or self.writer.isatty()
1205
1206 @property
1207 def closed(self):
1208 return self.writer.closed
1209
1210
1211class BufferedRandom(BufferedWriter, BufferedReader):
1212
1213 """A buffered interface to random access streams.
1214
1215 The constructor creates a reader and writer for a seekable stream,
1216 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001217 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001218 """
1219
Benjamin Peterson59406a92009-03-26 17:10:29 +00001220 _warning_stack_offset = 3
1221
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 def __init__(self, raw,
1223 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1224 raw._checkSeekable()
1225 BufferedReader.__init__(self, raw, buffer_size)
1226 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1227
1228 def seek(self, pos, whence=0):
1229 if not (0 <= whence <= 2):
1230 raise ValueError("invalid whence")
1231 self.flush()
1232 if self._read_buf:
1233 # Undo read ahead.
1234 with self._read_lock:
1235 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1236 # First do the raw seek, then empty the read buffer, so that
1237 # if the raw seek fails, we don't lose buffered data forever.
1238 pos = self.raw.seek(pos, whence)
1239 with self._read_lock:
1240 self._reset_read_buf()
1241 if pos < 0:
1242 raise IOError("seek() returned invalid position")
1243 return pos
1244
1245 def tell(self):
1246 if self._write_buf:
1247 return BufferedWriter.tell(self)
1248 else:
1249 return BufferedReader.tell(self)
1250
1251 def truncate(self, pos=None):
1252 if pos is None:
1253 pos = self.tell()
1254 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001255 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001256
1257 def read(self, n=None):
1258 if n is None:
1259 n = -1
1260 self.flush()
1261 return BufferedReader.read(self, n)
1262
1263 def readinto(self, b):
1264 self.flush()
1265 return BufferedReader.readinto(self, b)
1266
1267 def peek(self, n=0):
1268 self.flush()
1269 return BufferedReader.peek(self, n)
1270
1271 def read1(self, n):
1272 self.flush()
1273 return BufferedReader.read1(self, n)
1274
1275 def write(self, b):
1276 if self._read_buf:
1277 # Undo readahead
1278 with self._read_lock:
1279 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1280 self._reset_read_buf()
1281 return BufferedWriter.write(self, b)
1282
1283
1284class TextIOBase(IOBase):
1285
1286 """Base class for text I/O.
1287
1288 This class provides a character and line based interface to stream
1289 I/O. There is no readinto method because Python's character strings
1290 are immutable. There is no public constructor.
1291 """
1292
Georg Brandl4d73b572011-01-13 07:13:06 +00001293 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001294 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295
1296 Read from underlying buffer until we have n characters or we hit EOF.
1297 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001298
1299 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300 """
1301 self._unsupported("read")
1302
Raymond Hettinger3c940242011-01-12 23:39:31 +00001303 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001304 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 self._unsupported("write")
1306
Georg Brandl4d73b572011-01-13 07:13:06 +00001307 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001308 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 self._unsupported("truncate")
1310
Raymond Hettinger3c940242011-01-12 23:39:31 +00001311 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 """Read until newline or EOF.
1313
1314 Returns an empty string if EOF is hit immediately.
1315 """
1316 self._unsupported("readline")
1317
Raymond Hettinger3c940242011-01-12 23:39:31 +00001318 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001319 """
1320 Separate the underlying buffer from the TextIOBase and return it.
1321
1322 After the underlying buffer has been detached, the TextIO is in an
1323 unusable state.
1324 """
1325 self._unsupported("detach")
1326
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327 @property
1328 def encoding(self):
1329 """Subclasses should override."""
1330 return None
1331
1332 @property
1333 def newlines(self):
1334 """Line endings translated so far.
1335
1336 Only line endings translated during reading are considered.
1337
1338 Subclasses should override.
1339 """
1340 return None
1341
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001342 @property
1343 def errors(self):
1344 """Error setting of the decoder or encoder.
1345
1346 Subclasses should override."""
1347 return None
1348
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349io.TextIOBase.register(TextIOBase)
1350
1351
1352class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1353 r"""Codec used when reading a file in universal newlines mode. It wraps
1354 another incremental decoder, translating \r\n and \r into \n. It also
1355 records the types of newlines encountered. When used with
1356 translate=False, it ensures that the newline sequence is returned in
1357 one piece.
1358 """
1359 def __init__(self, decoder, translate, errors='strict'):
1360 codecs.IncrementalDecoder.__init__(self, errors=errors)
1361 self.translate = translate
1362 self.decoder = decoder
1363 self.seennl = 0
1364 self.pendingcr = False
1365
1366 def decode(self, input, final=False):
1367 # decode input (with the eventual \r from a previous pass)
1368 if self.decoder is None:
1369 output = input
1370 else:
1371 output = self.decoder.decode(input, final=final)
1372 if self.pendingcr and (output or final):
1373 output = "\r" + output
1374 self.pendingcr = False
1375
1376 # retain last \r even when not translating data:
1377 # then readline() is sure to get \r\n in one pass
1378 if output.endswith("\r") and not final:
1379 output = output[:-1]
1380 self.pendingcr = True
1381
1382 # Record which newlines are read
1383 crlf = output.count('\r\n')
1384 cr = output.count('\r') - crlf
1385 lf = output.count('\n') - crlf
1386 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1387 | (crlf and self._CRLF)
1388
1389 if self.translate:
1390 if crlf:
1391 output = output.replace("\r\n", "\n")
1392 if cr:
1393 output = output.replace("\r", "\n")
1394
1395 return output
1396
1397 def getstate(self):
1398 if self.decoder is None:
1399 buf = b""
1400 flag = 0
1401 else:
1402 buf, flag = self.decoder.getstate()
1403 flag <<= 1
1404 if self.pendingcr:
1405 flag |= 1
1406 return buf, flag
1407
1408 def setstate(self, state):
1409 buf, flag = state
1410 self.pendingcr = bool(flag & 1)
1411 if self.decoder is not None:
1412 self.decoder.setstate((buf, flag >> 1))
1413
1414 def reset(self):
1415 self.seennl = 0
1416 self.pendingcr = False
1417 if self.decoder is not None:
1418 self.decoder.reset()
1419
1420 _LF = 1
1421 _CR = 2
1422 _CRLF = 4
1423
1424 @property
1425 def newlines(self):
1426 return (None,
1427 "\n",
1428 "\r",
1429 ("\r", "\n"),
1430 "\r\n",
1431 ("\n", "\r\n"),
1432 ("\r", "\r\n"),
1433 ("\r", "\n", "\r\n")
1434 )[self.seennl]
1435
1436
1437class TextIOWrapper(TextIOBase):
1438
1439 r"""Character and line based layer over a BufferedIOBase object, buffer.
1440
1441 encoding gives the name of the encoding that the stream will be
1442 decoded or encoded with. It defaults to locale.getpreferredencoding.
1443
1444 errors determines the strictness of encoding and decoding (see the
1445 codecs.register) and defaults to "strict".
1446
1447 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1448 handling of line endings. If it is None, universal newlines is
1449 enabled. With this enabled, on input, the lines endings '\n', '\r',
1450 or '\r\n' are translated to '\n' before being returned to the
1451 caller. Conversely, on output, '\n' is translated to the system
1452 default line seperator, os.linesep. If newline is any other of its
1453 legal values, that newline becomes the newline when the file is read
1454 and it is returned untranslated. On output, '\n' is converted to the
1455 newline.
1456
1457 If line_buffering is True, a call to flush is implied when a call to
1458 write contains a newline character.
1459 """
1460
1461 _CHUNK_SIZE = 2048
1462
1463 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001464 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465 if newline is not None and not isinstance(newline, str):
1466 raise TypeError("illegal newline type: %r" % (type(newline),))
1467 if newline not in (None, "", "\n", "\r", "\r\n"):
1468 raise ValueError("illegal newline value: %r" % (newline,))
1469 if encoding is None:
1470 try:
1471 encoding = os.device_encoding(buffer.fileno())
1472 except (AttributeError, UnsupportedOperation):
1473 pass
1474 if encoding is None:
1475 try:
1476 import locale
1477 except ImportError:
1478 # Importing locale may fail if Python is being built
1479 encoding = "ascii"
1480 else:
1481 encoding = locale.getpreferredencoding()
1482
1483 if not isinstance(encoding, str):
1484 raise ValueError("invalid encoding: %r" % encoding)
1485
1486 if errors is None:
1487 errors = "strict"
1488 else:
1489 if not isinstance(errors, str):
1490 raise ValueError("invalid errors: %r" % errors)
1491
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001492 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493 self._line_buffering = line_buffering
1494 self._encoding = encoding
1495 self._errors = errors
1496 self._readuniversal = not newline
1497 self._readtranslate = newline is None
1498 self._readnl = newline
1499 self._writetranslate = newline != ''
1500 self._writenl = newline or os.linesep
1501 self._encoder = None
1502 self._decoder = None
1503 self._decoded_chars = '' # buffer for text returned from decoder
1504 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1505 self._snapshot = None # info for reconstructing decoder state
1506 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001507 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001508 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001509
Antoine Pitroue4501852009-05-14 18:55:55 +00001510 if self._seekable and self.writable():
1511 position = self.buffer.tell()
1512 if position != 0:
1513 try:
1514 self._get_encoder().setstate(0)
1515 except LookupError:
1516 # Sometimes the encoder doesn't exist
1517 pass
1518
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001519 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1520 # where dec_flags is the second (integer) item of the decoder state
1521 # and next_input is the chunk of input bytes that comes next after the
1522 # snapshot point. We use this to reconstruct decoder states in tell().
1523
1524 # Naming convention:
1525 # - "bytes_..." for integer variables that count input bytes
1526 # - "chars_..." for integer variables that count decoded characters
1527
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001528 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001529 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001530 try:
1531 name = self.name
1532 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001533 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001534 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001535 result += " name={0!r}".format(name)
1536 try:
1537 mode = self.mode
1538 except AttributeError:
1539 pass
1540 else:
1541 result += " mode={0!r}".format(mode)
1542 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001543
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544 @property
1545 def encoding(self):
1546 return self._encoding
1547
1548 @property
1549 def errors(self):
1550 return self._errors
1551
1552 @property
1553 def line_buffering(self):
1554 return self._line_buffering
1555
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001556 @property
1557 def buffer(self):
1558 return self._buffer
1559
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001560 def seekable(self):
1561 return self._seekable
1562
1563 def readable(self):
1564 return self.buffer.readable()
1565
1566 def writable(self):
1567 return self.buffer.writable()
1568
1569 def flush(self):
1570 self.buffer.flush()
1571 self._telling = self._seekable
1572
1573 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001574 if self.buffer is not None and not self.closed:
1575 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001576 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577
1578 @property
1579 def closed(self):
1580 return self.buffer.closed
1581
1582 @property
1583 def name(self):
1584 return self.buffer.name
1585
1586 def fileno(self):
1587 return self.buffer.fileno()
1588
1589 def isatty(self):
1590 return self.buffer.isatty()
1591
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001592 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001593 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594 if self.closed:
1595 raise ValueError("write to closed file")
1596 if not isinstance(s, str):
1597 raise TypeError("can't write %s to text stream" %
1598 s.__class__.__name__)
1599 length = len(s)
1600 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1601 if haslf and self._writetranslate and self._writenl != "\n":
1602 s = s.replace("\n", self._writenl)
1603 encoder = self._encoder or self._get_encoder()
1604 # XXX What if we were just reading?
1605 b = encoder.encode(s)
1606 self.buffer.write(b)
1607 if self._line_buffering and (haslf or "\r" in s):
1608 self.flush()
1609 self._snapshot = None
1610 if self._decoder:
1611 self._decoder.reset()
1612 return length
1613
1614 def _get_encoder(self):
1615 make_encoder = codecs.getincrementalencoder(self._encoding)
1616 self._encoder = make_encoder(self._errors)
1617 return self._encoder
1618
1619 def _get_decoder(self):
1620 make_decoder = codecs.getincrementaldecoder(self._encoding)
1621 decoder = make_decoder(self._errors)
1622 if self._readuniversal:
1623 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1624 self._decoder = decoder
1625 return decoder
1626
1627 # The following three methods implement an ADT for _decoded_chars.
1628 # Text returned from the decoder is buffered here until the client
1629 # requests it by calling our read() or readline() method.
1630 def _set_decoded_chars(self, chars):
1631 """Set the _decoded_chars buffer."""
1632 self._decoded_chars = chars
1633 self._decoded_chars_used = 0
1634
1635 def _get_decoded_chars(self, n=None):
1636 """Advance into the _decoded_chars buffer."""
1637 offset = self._decoded_chars_used
1638 if n is None:
1639 chars = self._decoded_chars[offset:]
1640 else:
1641 chars = self._decoded_chars[offset:offset + n]
1642 self._decoded_chars_used += len(chars)
1643 return chars
1644
1645 def _rewind_decoded_chars(self, n):
1646 """Rewind the _decoded_chars buffer."""
1647 if self._decoded_chars_used < n:
1648 raise AssertionError("rewind decoded_chars out of bounds")
1649 self._decoded_chars_used -= n
1650
1651 def _read_chunk(self):
1652 """
1653 Read and decode the next chunk of data from the BufferedReader.
1654 """
1655
1656 # The return value is True unless EOF was reached. The decoded
1657 # string is placed in self._decoded_chars (replacing its previous
1658 # value). The entire input chunk is sent to the decoder, though
1659 # some of it may remain buffered in the decoder, yet to be
1660 # converted.
1661
1662 if self._decoder is None:
1663 raise ValueError("no decoder")
1664
1665 if self._telling:
1666 # To prepare for tell(), we need to snapshot a point in the
1667 # file where the decoder's input buffer is empty.
1668
1669 dec_buffer, dec_flags = self._decoder.getstate()
1670 # Given this, we know there was a valid snapshot point
1671 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1672
1673 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001674 if self._has_read1:
1675 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1676 else:
1677 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001679 decoded_chars = self._decoder.decode(input_chunk, eof)
1680 self._set_decoded_chars(decoded_chars)
1681 if decoded_chars:
1682 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1683 else:
1684 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685
1686 if self._telling:
1687 # At the snapshot point, len(dec_buffer) bytes before the read,
1688 # the next input to be decoded is dec_buffer + input_chunk.
1689 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1690
1691 return not eof
1692
1693 def _pack_cookie(self, position, dec_flags=0,
1694 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1695 # The meaning of a tell() cookie is: seek to position, set the
1696 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1697 # into the decoder with need_eof as the EOF flag, then skip
1698 # chars_to_skip characters of the decoded result. For most simple
1699 # decoders, tell() will often just give a byte offset in the file.
1700 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1701 (chars_to_skip<<192) | bool(need_eof)<<256)
1702
1703 def _unpack_cookie(self, bigint):
1704 rest, position = divmod(bigint, 1<<64)
1705 rest, dec_flags = divmod(rest, 1<<64)
1706 rest, bytes_to_feed = divmod(rest, 1<<64)
1707 need_eof, chars_to_skip = divmod(rest, 1<<64)
1708 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1709
1710 def tell(self):
1711 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001712 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 if not self._telling:
1714 raise IOError("telling position disabled by next() call")
1715 self.flush()
1716 position = self.buffer.tell()
1717 decoder = self._decoder
1718 if decoder is None or self._snapshot is None:
1719 if self._decoded_chars:
1720 # This should never happen.
1721 raise AssertionError("pending decoded text")
1722 return position
1723
1724 # Skip backward to the snapshot point (see _read_chunk).
1725 dec_flags, next_input = self._snapshot
1726 position -= len(next_input)
1727
1728 # How many decoded characters have been used up since the snapshot?
1729 chars_to_skip = self._decoded_chars_used
1730 if chars_to_skip == 0:
1731 # We haven't moved from the snapshot point.
1732 return self._pack_cookie(position, dec_flags)
1733
1734 # Starting from the snapshot position, we will walk the decoder
1735 # forward until it gives us enough decoded characters.
1736 saved_state = decoder.getstate()
1737 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001738 # Fast search for an acceptable start point, close to our
1739 # current pos.
1740 # Rationale: calling decoder.decode() has a large overhead
1741 # regardless of chunk size; we want the number of such calls to
1742 # be O(1) in most situations (common decoders, non-crazy input).
1743 # Actually, it will be exactly 1 for fixed-size codecs (all
1744 # 8-bit codecs, also UTF-16 and UTF-32).
1745 skip_bytes = int(self._b2cratio * chars_to_skip)
1746 skip_back = 1
1747 assert skip_bytes <= len(next_input)
1748 while skip_bytes > 0:
1749 decoder.setstate((b'', dec_flags))
1750 # Decode up to temptative start point
1751 n = len(decoder.decode(next_input[:skip_bytes]))
1752 if n <= chars_to_skip:
1753 b, d = decoder.getstate()
1754 if not b:
1755 # Before pos and no bytes buffered in decoder => OK
1756 dec_flags = d
1757 chars_to_skip -= n
1758 break
1759 # Skip back by buffered amount and reset heuristic
1760 skip_bytes -= len(b)
1761 skip_back = 1
1762 else:
1763 # We're too far ahead, skip back a bit
1764 skip_bytes -= skip_back
1765 skip_back = skip_back * 2
1766 else:
1767 skip_bytes = 0
1768 decoder.setstate((b'', dec_flags))
1769
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001770 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001771 start_pos = position + skip_bytes
1772 start_flags = dec_flags
1773 if chars_to_skip == 0:
1774 # We haven't moved from the start point.
1775 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776
1777 # Feed the decoder one byte at a time. As we go, note the
1778 # nearest "safe start point" before the current location
1779 # (a point where the decoder has nothing buffered, so seek()
1780 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001781 bytes_fed = 0
1782 need_eof = 0
1783 # Chars decoded since `start_pos`
1784 chars_decoded = 0
1785 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001787 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 dec_buffer, dec_flags = decoder.getstate()
1789 if not dec_buffer and chars_decoded <= chars_to_skip:
1790 # Decoder buffer is empty, so this is a safe start point.
1791 start_pos += bytes_fed
1792 chars_to_skip -= chars_decoded
1793 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1794 if chars_decoded >= chars_to_skip:
1795 break
1796 else:
1797 # We didn't get enough decoded data; signal EOF to get more.
1798 chars_decoded += len(decoder.decode(b'', final=True))
1799 need_eof = 1
1800 if chars_decoded < chars_to_skip:
1801 raise IOError("can't reconstruct logical file position")
1802
1803 # The returned cookie corresponds to the last safe start point.
1804 return self._pack_cookie(
1805 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1806 finally:
1807 decoder.setstate(saved_state)
1808
1809 def truncate(self, pos=None):
1810 self.flush()
1811 if pos is None:
1812 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001813 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001814
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001815 def detach(self):
1816 if self.buffer is None:
1817 raise ValueError("buffer is already detached")
1818 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001819 buffer = self._buffer
1820 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001821 return buffer
1822
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 def seek(self, cookie, whence=0):
1824 if self.closed:
1825 raise ValueError("tell on closed file")
1826 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001827 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 if whence == 1: # seek relative to current position
1829 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001830 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 # Seeking to the current position should attempt to
1832 # sync the underlying buffer with the current position.
1833 whence = 0
1834 cookie = self.tell()
1835 if whence == 2: # seek relative to end of file
1836 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001837 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838 self.flush()
1839 position = self.buffer.seek(0, 2)
1840 self._set_decoded_chars('')
1841 self._snapshot = None
1842 if self._decoder:
1843 self._decoder.reset()
1844 return position
1845 if whence != 0:
1846 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1847 (whence,))
1848 if cookie < 0:
1849 raise ValueError("negative seek position %r" % (cookie,))
1850 self.flush()
1851
1852 # The strategy of seek() is to go back to the safe start point
1853 # and replay the effect of read(chars_to_skip) from there.
1854 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1855 self._unpack_cookie(cookie)
1856
1857 # Seek back to the safe start point.
1858 self.buffer.seek(start_pos)
1859 self._set_decoded_chars('')
1860 self._snapshot = None
1861
1862 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001863 if cookie == 0 and self._decoder:
1864 self._decoder.reset()
1865 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 self._decoder = self._decoder or self._get_decoder()
1867 self._decoder.setstate((b'', dec_flags))
1868 self._snapshot = (dec_flags, b'')
1869
1870 if chars_to_skip:
1871 # Just like _read_chunk, feed the decoder and save a snapshot.
1872 input_chunk = self.buffer.read(bytes_to_feed)
1873 self._set_decoded_chars(
1874 self._decoder.decode(input_chunk, need_eof))
1875 self._snapshot = (dec_flags, input_chunk)
1876
1877 # Skip chars_to_skip of the decoded characters.
1878 if len(self._decoded_chars) < chars_to_skip:
1879 raise IOError("can't restore logical file position")
1880 self._decoded_chars_used = chars_to_skip
1881
Antoine Pitroue4501852009-05-14 18:55:55 +00001882 # Finally, reset the encoder (merely useful for proper BOM handling)
1883 try:
1884 encoder = self._encoder or self._get_encoder()
1885 except LookupError:
1886 # Sometimes the encoder doesn't exist
1887 pass
1888 else:
1889 if cookie != 0:
1890 encoder.setstate(0)
1891 else:
1892 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 return cookie
1894
1895 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001896 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897 if n is None:
1898 n = -1
1899 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001900 try:
1901 n.__index__
1902 except AttributeError as err:
1903 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904 if n < 0:
1905 # Read everything.
1906 result = (self._get_decoded_chars() +
1907 decoder.decode(self.buffer.read(), final=True))
1908 self._set_decoded_chars('')
1909 self._snapshot = None
1910 return result
1911 else:
1912 # Keep reading chunks until we have n characters to return.
1913 eof = False
1914 result = self._get_decoded_chars(n)
1915 while len(result) < n and not eof:
1916 eof = not self._read_chunk()
1917 result += self._get_decoded_chars(n - len(result))
1918 return result
1919
1920 def __next__(self):
1921 self._telling = False
1922 line = self.readline()
1923 if not line:
1924 self._snapshot = None
1925 self._telling = self._seekable
1926 raise StopIteration
1927 return line
1928
1929 def readline(self, limit=None):
1930 if self.closed:
1931 raise ValueError("read from closed file")
1932 if limit is None:
1933 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001934 elif not isinstance(limit, int):
1935 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936
1937 # Grab all the decoded text (we will rewind any extra bits later).
1938 line = self._get_decoded_chars()
1939
1940 start = 0
1941 # Make the decoder if it doesn't already exist.
1942 if not self._decoder:
1943 self._get_decoder()
1944
1945 pos = endpos = None
1946 while True:
1947 if self._readtranslate:
1948 # Newlines are already translated, only search for \n
1949 pos = line.find('\n', start)
1950 if pos >= 0:
1951 endpos = pos + 1
1952 break
1953 else:
1954 start = len(line)
1955
1956 elif self._readuniversal:
1957 # Universal newline search. Find any of \r, \r\n, \n
1958 # The decoder ensures that \r\n are not split in two pieces
1959
1960 # In C we'd look for these in parallel of course.
1961 nlpos = line.find("\n", start)
1962 crpos = line.find("\r", start)
1963 if crpos == -1:
1964 if nlpos == -1:
1965 # Nothing found
1966 start = len(line)
1967 else:
1968 # Found \n
1969 endpos = nlpos + 1
1970 break
1971 elif nlpos == -1:
1972 # Found lone \r
1973 endpos = crpos + 1
1974 break
1975 elif nlpos < crpos:
1976 # Found \n
1977 endpos = nlpos + 1
1978 break
1979 elif nlpos == crpos + 1:
1980 # Found \r\n
1981 endpos = crpos + 2
1982 break
1983 else:
1984 # Found \r
1985 endpos = crpos + 1
1986 break
1987 else:
1988 # non-universal
1989 pos = line.find(self._readnl)
1990 if pos >= 0:
1991 endpos = pos + len(self._readnl)
1992 break
1993
1994 if limit >= 0 and len(line) >= limit:
1995 endpos = limit # reached length limit
1996 break
1997
1998 # No line ending seen yet - get more data'
1999 while self._read_chunk():
2000 if self._decoded_chars:
2001 break
2002 if self._decoded_chars:
2003 line += self._get_decoded_chars()
2004 else:
2005 # end of file
2006 self._set_decoded_chars('')
2007 self._snapshot = None
2008 return line
2009
2010 if limit >= 0 and endpos > limit:
2011 endpos = limit # don't exceed limit
2012
2013 # Rewind _decoded_chars to just after the line ending we found.
2014 self._rewind_decoded_chars(len(line) - endpos)
2015 return line[:endpos]
2016
2017 @property
2018 def newlines(self):
2019 return self._decoder.newlines if self._decoder else None
2020
2021
2022class StringIO(TextIOWrapper):
2023 """Text I/O implementation using an in-memory buffer.
2024
2025 The initial_value argument sets the value of object. The newline
2026 argument is like the one of TextIOWrapper's constructor.
2027 """
2028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 def __init__(self, initial_value="", newline="\n"):
2030 super(StringIO, self).__init__(BytesIO(),
2031 encoding="utf-8",
2032 errors="strict",
2033 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002034 # Issue #5645: make universal newlines semantics the same as in the
2035 # C version, even under Windows.
2036 if newline is None:
2037 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002038 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002040 raise TypeError("initial_value must be str or None, not {0}"
2041 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 initial_value = str(initial_value)
2043 self.write(initial_value)
2044 self.seek(0)
2045
2046 def getvalue(self):
2047 self.flush()
2048 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002049
2050 def __repr__(self):
2051 # TextIOWrapper tells the encoding in its repr. In StringIO,
2052 # that's a implementation detail.
2053 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002054
2055 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002056 def errors(self):
2057 return None
2058
2059 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002060 def encoding(self):
2061 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002062
2063 def detach(self):
2064 # This doesn't make sense on StringIO.
2065 self._unsupported("detach")