blob: fcd548ddc388272e4e544f93ecdb3039cf7bc511 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01009import errno
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000010# Import _thread instead of threading to reduce startup cost
11try:
12 from _thread import allocate_lock as Lock
13except ImportError:
14 from _dummy_thread import allocate_lock as Lock
15
16import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000017from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020026# Rebind for compatibility
27BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000028
29
Georg Brandl4d73b572011-01-13 07:13:06 +000030def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020031 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
33 r"""Open file and return a stream. Raise IOError upon failure.
34
35 file is either a text or byte string giving the name (and the path
36 if the file isn't in the current working directory) of the file to
37 be opened or an integer file descriptor of the file to be
38 wrapped. (If a file descriptor is given, it is closed when the
39 returned I/O object is closed, unless closefd is set to False.)
40
41 mode is an optional string that specifies the mode in which the file
42 is opened. It defaults to 'r' which means open for reading in text
43 mode. Other common values are 'w' for writing (truncating the file if
44 it already exists), and 'a' for appending (which on some Unix systems,
45 means that all writes append to the end of the file regardless of the
46 current seek position). In text mode, if encoding is not specified the
47 encoding used is platform dependent. (For reading and writing raw
48 bytes use binary mode and leave encoding unspecified.) The available
49 modes are:
50
51 ========= ===============================================================
52 Character Meaning
53 --------- ---------------------------------------------------------------
54 'r' open for reading (default)
55 'w' open for writing, truncating the file first
56 'a' open for writing, appending to the end of the file if it exists
57 'b' binary mode
58 't' text mode (default)
59 '+' open a disk file for updating (reading and writing)
60 'U' universal newline mode (for backwards compatibility; unneeded
61 for new code)
62 ========= ===============================================================
63
64 The default mode is 'rt' (open for reading text). For binary random
65 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
66 'r+b' opens the file without truncation.
67
68 Python distinguishes between files opened in binary and text modes,
69 even when the underlying operating system doesn't. Files opened in
70 binary mode (appending 'b' to the mode argument) return contents as
71 bytes objects without any decoding. In text mode (the default, or when
72 't' is appended to the mode argument), the contents of the file are
73 returned as strings, the bytes having been first decoded using a
74 platform-dependent encoding or using the specified encoding if given.
75
Antoine Pitroud5587bc2009-12-19 21:08:31 +000076 buffering is an optional integer used to set the buffering policy.
77 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
78 line buffering (only usable in text mode), and an integer > 1 to indicate
79 the size of a fixed-size chunk buffer. When no buffering argument is
80 given, the default buffering policy works as follows:
81
82 * Binary files are buffered in fixed-size chunks; the size of the buffer
83 is chosen using a heuristic trying to determine the underlying device's
84 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
85 On many systems, the buffer will typically be 4096 or 8192 bytes long.
86
87 * "Interactive" text files (files for which isatty() returns True)
88 use line buffering. Other text files use the policy described above
89 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000090
Raymond Hettingercbb80892011-01-13 18:15:51 +000091 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 file. This should only be used in text mode. The default encoding is
93 platform dependent, but any encoding supported by Python can be
94 passed. See the codecs module for the list of supported encodings.
95
96 errors is an optional string that specifies how encoding errors are to
97 be handled---this argument should not be used in binary mode. Pass
98 'strict' to raise a ValueError exception if there is an encoding error
99 (the default of None has the same effect), or pass 'ignore' to ignore
100 errors. (Note that ignoring encoding errors can lead to data loss.)
101 See the documentation for codecs.register for a list of the permitted
102 encoding error strings.
103
Raymond Hettingercbb80892011-01-13 18:15:51 +0000104 newline is a string controlling how universal newlines works (it only
105 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
106 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107
108 * On input, if newline is None, universal newlines mode is
109 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
110 these are translated into '\n' before being returned to the
111 caller. If it is '', universal newline mode is enabled, but line
112 endings are returned to the caller untranslated. If it has any of
113 the other legal values, input lines are only terminated by the given
114 string, and the line ending is returned to the caller untranslated.
115
116 * On output, if newline is None, any '\n' characters written are
117 translated to the system default line separator, os.linesep. If
118 newline is '', no translation takes place. If newline is any of the
119 other legal values, any '\n' characters written are translated to
120 the given string.
121
Raymond Hettingercbb80892011-01-13 18:15:51 +0000122 closedfd is a bool. If closefd is False, the underlying file descriptor will
123 be kept open when the file is closed. This does not work when a file name is
124 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125
Ross Lagerwall59142db2011-10-31 20:34:46 +0200126 A custom opener can be used by passing a callable as *opener*. The
127 underlying file descriptor for the file object is then obtained by calling
128 *opener* with (*file*, *flags*). *opener* must return an open file
129 descriptor (passing os.open as *opener* results in functionality similar to
130 passing None).
131
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000132 open() returns a file object whose type depends on the mode, and
133 through which the standard file operations such as reading and writing
134 are performed. When open() is used to open a file in a text mode ('w',
135 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
136 a file in a binary mode, the returned class varies: in read binary
137 mode, it returns a BufferedReader; in write binary and append binary
138 modes, it returns a BufferedWriter, and in read/write mode, it returns
139 a BufferedRandom.
140
141 It is also possible to use a string or bytearray as a file for both
142 reading and writing. For strings StringIO can be used like a file
143 opened in a text mode, and for bytes a BytesIO can be used like a file
144 opened in a binary mode.
145 """
146 if not isinstance(file, (str, bytes, int)):
147 raise TypeError("invalid file: %r" % file)
148 if not isinstance(mode, str):
149 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000150 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000151 raise TypeError("invalid buffering: %r" % buffering)
152 if encoding is not None and not isinstance(encoding, str):
153 raise TypeError("invalid encoding: %r" % encoding)
154 if errors is not None and not isinstance(errors, str):
155 raise TypeError("invalid errors: %r" % errors)
156 modes = set(mode)
157 if modes - set("arwb+tU") or len(mode) > len(modes):
158 raise ValueError("invalid mode: %r" % mode)
159 reading = "r" in modes
160 writing = "w" in modes
161 appending = "a" in modes
162 updating = "+" in modes
163 text = "t" in modes
164 binary = "b" in modes
165 if "U" in modes:
166 if writing or appending:
167 raise ValueError("can't use U and writing mode at once")
168 reading = True
169 if text and binary:
170 raise ValueError("can't have text and binary mode at once")
171 if reading + writing + appending > 1:
172 raise ValueError("can't have read/write/append mode at once")
173 if not (reading or writing or appending):
174 raise ValueError("must have exactly one of read/write/append mode")
175 if binary and encoding is not None:
176 raise ValueError("binary mode doesn't take an encoding argument")
177 if binary and errors is not None:
178 raise ValueError("binary mode doesn't take an errors argument")
179 if binary and newline is not None:
180 raise ValueError("binary mode doesn't take a newline argument")
181 raw = FileIO(file,
182 (reading and "r" or "") +
183 (writing and "w" or "") +
184 (appending and "a" or "") +
185 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200186 closefd, opener=opener)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 line_buffering = False
188 if buffering == 1 or buffering < 0 and raw.isatty():
189 buffering = -1
190 line_buffering = True
191 if buffering < 0:
192 buffering = DEFAULT_BUFFER_SIZE
193 try:
194 bs = os.fstat(raw.fileno()).st_blksize
195 except (os.error, AttributeError):
196 pass
197 else:
198 if bs > 1:
199 buffering = bs
200 if buffering < 0:
201 raise ValueError("invalid buffering size")
202 if buffering == 0:
203 if binary:
204 return raw
205 raise ValueError("can't have unbuffered text I/O")
206 if updating:
207 buffer = BufferedRandom(raw, buffering)
208 elif writing or appending:
209 buffer = BufferedWriter(raw, buffering)
210 elif reading:
211 buffer = BufferedReader(raw, buffering)
212 else:
213 raise ValueError("unknown mode: %r" % mode)
214 if binary:
215 return buffer
216 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
217 text.mode = mode
218 return text
219
220
221class DocDescriptor:
222 """Helper for builtins.open.__doc__
223 """
224 def __get__(self, obj, typ):
225 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000226 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000227 "errors=None, newline=None, closefd=True)\n\n" +
228 open.__doc__)
229
230class OpenWrapper:
231 """Wrapper for builtins.open
232
233 Trick so that open won't become a bound method when stored
234 as a class variable (as dbm.dumb does).
235
236 See initstdio() in Python/pythonrun.c.
237 """
238 __doc__ = DocDescriptor()
239
240 def __new__(cls, *args, **kwargs):
241 return open(*args, **kwargs)
242
243
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000244# In normal operation, both `UnsupportedOperation`s should be bound to the
245# same object.
246try:
247 UnsupportedOperation = io.UnsupportedOperation
248except AttributeError:
249 class UnsupportedOperation(ValueError, IOError):
250 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252
253class IOBase(metaclass=abc.ABCMeta):
254
255 """The abstract base class for all I/O classes, acting on streams of
256 bytes. There is no public constructor.
257
258 This class provides dummy implementations for many methods that
259 derived classes can override selectively; the default implementations
260 represent a file that cannot be read, written or seeked.
261
262 Even though IOBase does not declare read, readinto, or write because
263 their signatures will vary, implementations and clients should
264 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000265 may raise UnsupportedOperation when operations they do not support are
266 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267
268 The basic type used for binary data read from or written to a file is
269 bytes. bytearrays are accepted too, and in some cases (such as
270 readinto) needed. Text I/O classes work with str data.
271
272 Note that calling any method (even inquiries) on a closed stream is
273 undefined. Implementations may raise IOError in this case.
274
275 IOBase (and its subclasses) support the iterator protocol, meaning
276 that an IOBase object can be iterated over yielding the lines in a
277 stream.
278
279 IOBase also supports the :keyword:`with` statement. In this example,
280 fp is closed after the suite of the with statement is complete:
281
282 with open('spam.txt', 'r') as fp:
283 fp.write('Spam and eggs!')
284 """
285
286 ### Internal ###
287
Raymond Hettinger3c940242011-01-12 23:39:31 +0000288 def _unsupported(self, name):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000289 """Internal: raise an IOError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000290 raise UnsupportedOperation("%s.%s() not supported" %
291 (self.__class__.__name__, name))
292
293 ### Positioning ###
294
Georg Brandl4d73b572011-01-13 07:13:06 +0000295 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000296 """Change stream position.
297
298 Change the stream position to byte offset offset. offset is
299 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000300 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301
302 * 0 -- start of stream (the default); offset should be zero or positive
303 * 1 -- current stream position; offset may be negative
304 * 2 -- end of stream; offset is usually negative
305
Raymond Hettingercbb80892011-01-13 18:15:51 +0000306 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000307 """
308 self._unsupported("seek")
309
Raymond Hettinger3c940242011-01-12 23:39:31 +0000310 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000311 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 return self.seek(0, 1)
313
Georg Brandl4d73b572011-01-13 07:13:06 +0000314 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000315 """Truncate file to size bytes.
316
317 Size defaults to the current IO position as reported by tell(). Return
318 the new size.
319 """
320 self._unsupported("truncate")
321
322 ### Flush and close ###
323
Raymond Hettinger3c940242011-01-12 23:39:31 +0000324 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 """Flush write buffers, if applicable.
326
327 This is not implemented for read-only and non-blocking streams.
328 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000329 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330 # XXX Should this return the number of bytes written???
331
332 __closed = False
333
Raymond Hettinger3c940242011-01-12 23:39:31 +0000334 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 """Flush and close the IO object.
336
337 This method has no effect if the file is already closed.
338 """
339 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000340 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self.__closed = True
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Destructor. Calls close()."""
345 # The try/except block is in case this is called at program
346 # exit time, when it's possible that globals have already been
347 # deleted, and then the close() call might fail. Since
348 # there's nothing we can do about such failures and they annoy
349 # the end users, we suppress the traceback.
350 try:
351 self.close()
352 except:
353 pass
354
355 ### Inquiries ###
356
Raymond Hettinger3c940242011-01-12 23:39:31 +0000357 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000358 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000360 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361 This method may need to do a test seek().
362 """
363 return False
364
365 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000366 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 """
368 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000369 raise UnsupportedOperation("File or stream is not seekable."
370 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371
Raymond Hettinger3c940242011-01-12 23:39:31 +0000372 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000373 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000375 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """
377 return False
378
379 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000380 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 """
382 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000383 raise UnsupportedOperation("File or stream is not readable."
384 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385
Raymond Hettinger3c940242011-01-12 23:39:31 +0000386 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000387 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000389 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390 """
391 return False
392
393 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000394 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 """
396 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000397 raise UnsupportedOperation("File or stream is not writable."
398 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399
400 @property
401 def closed(self):
402 """closed: bool. True iff the file has been closed.
403
404 For backwards compatibility, this is a property, not a predicate.
405 """
406 return self.__closed
407
408 def _checkClosed(self, msg=None):
409 """Internal: raise an ValueError if file is closed
410 """
411 if self.closed:
412 raise ValueError("I/O operation on closed file."
413 if msg is None else msg)
414
415 ### Context manager ###
416
Raymond Hettinger3c940242011-01-12 23:39:31 +0000417 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000418 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 self._checkClosed()
420 return self
421
Raymond Hettinger3c940242011-01-12 23:39:31 +0000422 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 """Context management protocol. Calls close()"""
424 self.close()
425
426 ### Lower-level APIs ###
427
428 # XXX Should these be present even if unimplemented?
429
Raymond Hettinger3c940242011-01-12 23:39:31 +0000430 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000431 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432
433 An IOError is raised if the IO object does not use a file descriptor.
434 """
435 self._unsupported("fileno")
436
Raymond Hettinger3c940242011-01-12 23:39:31 +0000437 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000438 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000439
440 Return False if it can't be determined.
441 """
442 self._checkClosed()
443 return False
444
445 ### Readline[s] and writelines ###
446
Georg Brandl4d73b572011-01-13 07:13:06 +0000447 def readline(self, limit=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000448 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000449
450 If limit is specified, at most limit bytes will be read.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000451 Limit should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452
453 The line terminator is always b'\n' for binary files; for text
454 files, the newlines argument to open can be used to select the line
455 terminator(s) recognized.
456 """
457 # For backwards compatibility, a (slowish) readline().
458 if hasattr(self, "peek"):
459 def nreadahead():
460 readahead = self.peek(1)
461 if not readahead:
462 return 1
463 n = (readahead.find(b"\n") + 1) or len(readahead)
464 if limit >= 0:
465 n = min(n, limit)
466 return n
467 else:
468 def nreadahead():
469 return 1
470 if limit is None:
471 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000472 elif not isinstance(limit, int):
473 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 res = bytearray()
475 while limit < 0 or len(res) < limit:
476 b = self.read(nreadahead())
477 if not b:
478 break
479 res += b
480 if res.endswith(b"\n"):
481 break
482 return bytes(res)
483
484 def __iter__(self):
485 self._checkClosed()
486 return self
487
488 def __next__(self):
489 line = self.readline()
490 if not line:
491 raise StopIteration
492 return line
493
494 def readlines(self, hint=None):
495 """Return a list of lines from the stream.
496
497 hint can be specified to control the number of lines read: no more
498 lines will be read if the total size (in bytes/characters) of all
499 lines so far exceeds hint.
500 """
501 if hint is None or hint <= 0:
502 return list(self)
503 n = 0
504 lines = []
505 for line in self:
506 lines.append(line)
507 n += len(line)
508 if n >= hint:
509 break
510 return lines
511
512 def writelines(self, lines):
513 self._checkClosed()
514 for line in lines:
515 self.write(line)
516
517io.IOBase.register(IOBase)
518
519
520class RawIOBase(IOBase):
521
522 """Base class for raw binary I/O."""
523
524 # The read() method is implemented by calling readinto(); derived
525 # classes that want to support read() only need to implement
526 # readinto() as a primitive operation. In general, readinto() can be
527 # more efficient than read().
528
529 # (It would be tempting to also provide an implementation of
530 # readinto() in terms of read(), in case the latter is a more suitable
531 # primitive operation, but that would lead to nasty recursion in case
532 # a subclass doesn't implement either.)
533
Georg Brandl4d73b572011-01-13 07:13:06 +0000534 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000535 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536
537 Returns an empty bytes object on EOF, or None if the object is
538 set not to block and has no data to read.
539 """
540 if n is None:
541 n = -1
542 if n < 0:
543 return self.readall()
544 b = bytearray(n.__index__())
545 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000546 if n is None:
547 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000548 del b[n:]
549 return bytes(b)
550
551 def readall(self):
552 """Read until EOF, using multiple read() call."""
553 res = bytearray()
554 while True:
555 data = self.read(DEFAULT_BUFFER_SIZE)
556 if not data:
557 break
558 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200559 if res:
560 return bytes(res)
561 else:
562 # b'' or None
563 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564
Raymond Hettinger3c940242011-01-12 23:39:31 +0000565 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000566 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567
Raymond Hettingercbb80892011-01-13 18:15:51 +0000568 Returns an int representing the number of bytes read (0 for EOF), or
569 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570 """
571 self._unsupported("readinto")
572
Raymond Hettinger3c940242011-01-12 23:39:31 +0000573 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000574 """Write the given buffer to the IO stream.
575
576 Returns the number of bytes written, which may be less than len(b).
577 """
578 self._unsupported("write")
579
580io.RawIOBase.register(RawIOBase)
581from _io import FileIO
582RawIOBase.register(FileIO)
583
584
585class BufferedIOBase(IOBase):
586
587 """Base class for buffered IO objects.
588
589 The main difference with RawIOBase is that the read() method
590 supports omitting the size argument, and does not have a default
591 implementation that defers to readinto().
592
593 In addition, read(), readinto() and write() may raise
594 BlockingIOError if the underlying raw stream is in non-blocking
595 mode and not ready; unlike their raw counterparts, they will never
596 return None.
597
598 A typical implementation should not inherit from a RawIOBase
599 implementation, but wrap one.
600 """
601
Georg Brandl4d73b572011-01-13 07:13:06 +0000602 def read(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000603 """Read and return up to n bytes, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604
605 If the argument is omitted, None, or negative, reads and
606 returns all data until EOF.
607
608 If the argument is positive, and the underlying raw stream is
609 not 'interactive', multiple raw reads may be issued to satisfy
610 the byte count (unless EOF is reached first). But for
611 interactive raw streams (XXX and for pipes?), at most one raw
612 read will be issued, and a short result does not imply that
613 EOF is imminent.
614
615 Returns an empty bytes array on EOF.
616
617 Raises BlockingIOError if the underlying raw stream has no
618 data at the moment.
619 """
620 self._unsupported("read")
621
Georg Brandl4d73b572011-01-13 07:13:06 +0000622 def read1(self, n=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000623 """Read up to n bytes with at most one read() system call,
624 where n is an int.
625 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 self._unsupported("read1")
627
Raymond Hettinger3c940242011-01-12 23:39:31 +0000628 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000629 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630
631 Like read(), this may issue multiple reads to the underlying raw
632 stream, unless the latter is 'interactive'.
633
Raymond Hettingercbb80892011-01-13 18:15:51 +0000634 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635
636 Raises BlockingIOError if the underlying raw stream has no
637 data at the moment.
638 """
639 # XXX This ought to work with anything that supports the buffer API
640 data = self.read(len(b))
641 n = len(data)
642 try:
643 b[:n] = data
644 except TypeError as err:
645 import array
646 if not isinstance(b, array.array):
647 raise err
648 b[:n] = array.array('b', data)
649 return n
650
Raymond Hettinger3c940242011-01-12 23:39:31 +0000651 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000652 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653
654 Return the number of bytes written, which is never less than
655 len(b).
656
657 Raises BlockingIOError if the buffer is full and the
658 underlying raw stream cannot accept more data at the moment.
659 """
660 self._unsupported("write")
661
Raymond Hettinger3c940242011-01-12 23:39:31 +0000662 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000663 """
664 Separate the underlying raw stream from the buffer and return it.
665
666 After the raw stream has been detached, the buffer is in an unusable
667 state.
668 """
669 self._unsupported("detach")
670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671io.BufferedIOBase.register(BufferedIOBase)
672
673
674class _BufferedIOMixin(BufferedIOBase):
675
676 """A mixin implementation of BufferedIOBase with an underlying raw stream.
677
678 This passes most requests on to the underlying raw stream. It
679 does *not* provide implementations of read(), readinto() or
680 write().
681 """
682
683 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000684 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000685
686 ### Positioning ###
687
688 def seek(self, pos, whence=0):
689 new_position = self.raw.seek(pos, whence)
690 if new_position < 0:
691 raise IOError("seek() returned an invalid position")
692 return new_position
693
694 def tell(self):
695 pos = self.raw.tell()
696 if pos < 0:
697 raise IOError("tell() returned an invalid position")
698 return pos
699
700 def truncate(self, pos=None):
701 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
702 # and a flush may be necessary to synch both views of the current
703 # file state.
704 self.flush()
705
706 if pos is None:
707 pos = self.tell()
708 # XXX: Should seek() be used, instead of passing the position
709 # XXX directly to truncate?
710 return self.raw.truncate(pos)
711
712 ### Flush and close ###
713
714 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000715 if self.closed:
716 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717 self.raw.flush()
718
719 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000720 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100721 try:
722 # may raise BlockingIOError or BrokenPipeError etc
723 self.flush()
724 finally:
725 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000727 def detach(self):
728 if self.raw is None:
729 raise ValueError("raw stream already detached")
730 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000731 raw = self._raw
732 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000733 return raw
734
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735 ### Inquiries ###
736
737 def seekable(self):
738 return self.raw.seekable()
739
740 def readable(self):
741 return self.raw.readable()
742
743 def writable(self):
744 return self.raw.writable()
745
746 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000747 def raw(self):
748 return self._raw
749
750 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751 def closed(self):
752 return self.raw.closed
753
754 @property
755 def name(self):
756 return self.raw.name
757
758 @property
759 def mode(self):
760 return self.raw.mode
761
Antoine Pitrou243757e2010-11-05 21:15:39 +0000762 def __getstate__(self):
763 raise TypeError("can not serialize a '{0}' object"
764 .format(self.__class__.__name__))
765
Antoine Pitrou716c4442009-05-23 19:04:03 +0000766 def __repr__(self):
767 clsname = self.__class__.__name__
768 try:
769 name = self.name
770 except AttributeError:
771 return "<_pyio.{0}>".format(clsname)
772 else:
773 return "<_pyio.{0} name={1!r}>".format(clsname, name)
774
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775 ### Lower-level APIs ###
776
777 def fileno(self):
778 return self.raw.fileno()
779
780 def isatty(self):
781 return self.raw.isatty()
782
783
784class BytesIO(BufferedIOBase):
785
786 """Buffered I/O implementation using an in-memory bytes buffer."""
787
788 def __init__(self, initial_bytes=None):
789 buf = bytearray()
790 if initial_bytes is not None:
791 buf += initial_bytes
792 self._buffer = buf
793 self._pos = 0
794
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000795 def __getstate__(self):
796 if self.closed:
797 raise ValueError("__getstate__ on closed file")
798 return self.__dict__.copy()
799
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 def getvalue(self):
801 """Return the bytes value (contents) of the buffer
802 """
803 if self.closed:
804 raise ValueError("getvalue on closed file")
805 return bytes(self._buffer)
806
Antoine Pitrou972ee132010-09-06 18:48:21 +0000807 def getbuffer(self):
808 """Return a readable and writable view of the buffer.
809 """
810 return memoryview(self._buffer)
811
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 def read(self, n=None):
813 if self.closed:
814 raise ValueError("read from closed file")
815 if n is None:
816 n = -1
817 if n < 0:
818 n = len(self._buffer)
819 if len(self._buffer) <= self._pos:
820 return b""
821 newpos = min(len(self._buffer), self._pos + n)
822 b = self._buffer[self._pos : newpos]
823 self._pos = newpos
824 return bytes(b)
825
826 def read1(self, n):
827 """This is the same as read.
828 """
829 return self.read(n)
830
831 def write(self, b):
832 if self.closed:
833 raise ValueError("write to closed file")
834 if isinstance(b, str):
835 raise TypeError("can't write str to binary stream")
836 n = len(b)
837 if n == 0:
838 return 0
839 pos = self._pos
840 if pos > len(self._buffer):
841 # Inserts null bytes between the current end of the file
842 # and the new write position.
843 padding = b'\x00' * (pos - len(self._buffer))
844 self._buffer += padding
845 self._buffer[pos:pos + n] = b
846 self._pos += n
847 return n
848
849 def seek(self, pos, whence=0):
850 if self.closed:
851 raise ValueError("seek on closed file")
852 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000853 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854 except AttributeError as err:
855 raise TypeError("an integer is required") from err
856 if whence == 0:
857 if pos < 0:
858 raise ValueError("negative seek position %r" % (pos,))
859 self._pos = pos
860 elif whence == 1:
861 self._pos = max(0, self._pos + pos)
862 elif whence == 2:
863 self._pos = max(0, len(self._buffer) + pos)
864 else:
865 raise ValueError("invalid whence value")
866 return self._pos
867
868 def tell(self):
869 if self.closed:
870 raise ValueError("tell on closed file")
871 return self._pos
872
873 def truncate(self, pos=None):
874 if self.closed:
875 raise ValueError("truncate on closed file")
876 if pos is None:
877 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000878 else:
879 try:
880 pos.__index__
881 except AttributeError as err:
882 raise TypeError("an integer is required") from err
883 if pos < 0:
884 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000885 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000886 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000887
888 def readable(self):
889 return True
890
891 def writable(self):
892 return True
893
894 def seekable(self):
895 return True
896
897
898class BufferedReader(_BufferedIOMixin):
899
900 """BufferedReader(raw[, buffer_size])
901
902 A buffer for a readable, sequential BaseRawIO object.
903
904 The constructor creates a BufferedReader for the given readable raw
905 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
906 is used.
907 """
908
909 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
910 """Create a new buffered reader using the given readable raw IO object.
911 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000912 if not raw.readable():
913 raise IOError('"raw" argument must be readable.')
914
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000915 _BufferedIOMixin.__init__(self, raw)
916 if buffer_size <= 0:
917 raise ValueError("invalid buffer size")
918 self.buffer_size = buffer_size
919 self._reset_read_buf()
920 self._read_lock = Lock()
921
922 def _reset_read_buf(self):
923 self._read_buf = b""
924 self._read_pos = 0
925
926 def read(self, n=None):
927 """Read n bytes.
928
929 Returns exactly n bytes of data unless the underlying raw IO
930 stream reaches EOF or if the call would block in non-blocking
931 mode. If n is negative, read until EOF or until read() would
932 block.
933 """
934 if n is not None and n < -1:
935 raise ValueError("invalid number of bytes to read")
936 with self._read_lock:
937 return self._read_unlocked(n)
938
939 def _read_unlocked(self, n=None):
940 nodata_val = b""
941 empty_values = (b"", None)
942 buf = self._read_buf
943 pos = self._read_pos
944
945 # Special case for when the number of bytes to read is unspecified.
946 if n is None or n == -1:
947 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200948 if hasattr(self.raw, 'readall'):
949 chunk = self.raw.readall()
950 if chunk is None:
951 return buf[pos:] or None
952 else:
953 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 chunks = [buf[pos:]] # Strip the consumed bytes.
955 current_size = 0
956 while True:
957 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +0000958 try:
959 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200960 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000961 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000962 if chunk in empty_values:
963 nodata_val = chunk
964 break
965 current_size += len(chunk)
966 chunks.append(chunk)
967 return b"".join(chunks) or nodata_val
968
969 # The number of bytes to read is specified, return at most n bytes.
970 avail = len(buf) - pos # Length of the available buffered data.
971 if n <= avail:
972 # Fast path: the data to read is fully buffered.
973 self._read_pos += n
974 return buf[pos:pos+n]
975 # Slow path: read from the stream until enough bytes are read,
976 # or until an EOF occurs or until read() would block.
977 chunks = [buf[pos:]]
978 wanted = max(self.buffer_size, n)
979 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000980 try:
981 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +0200982 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +0000983 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000984 if chunk in empty_values:
985 nodata_val = chunk
986 break
987 avail += len(chunk)
988 chunks.append(chunk)
989 # n is more then avail only when an EOF occurred or when
990 # read() would have blocked.
991 n = min(n, avail)
992 out = b"".join(chunks)
993 self._read_buf = out[n:] # Save the extra data in the buffer.
994 self._read_pos = 0
995 return out[:n] if out else nodata_val
996
997 def peek(self, n=0):
998 """Returns buffered bytes without advancing the position.
999
1000 The argument indicates a desired minimal number of bytes; we
1001 do at most one raw read to satisfy it. We never return more
1002 than self.buffer_size.
1003 """
1004 with self._read_lock:
1005 return self._peek_unlocked(n)
1006
1007 def _peek_unlocked(self, n=0):
1008 want = min(n, self.buffer_size)
1009 have = len(self._read_buf) - self._read_pos
1010 if have < want or have <= 0:
1011 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001012 while True:
1013 try:
1014 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001015 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001016 continue
1017 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 if current:
1019 self._read_buf = self._read_buf[self._read_pos:] + current
1020 self._read_pos = 0
1021 return self._read_buf[self._read_pos:]
1022
1023 def read1(self, n):
1024 """Reads up to n bytes, with at most one read() system call."""
1025 # Returns up to n bytes. If at least one byte is buffered, we
1026 # only return buffered bytes. Otherwise, we do one raw read.
1027 if n < 0:
1028 raise ValueError("number of bytes to read must be positive")
1029 if n == 0:
1030 return b""
1031 with self._read_lock:
1032 self._peek_unlocked(1)
1033 return self._read_unlocked(
1034 min(n, len(self._read_buf) - self._read_pos))
1035
1036 def tell(self):
1037 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1038
1039 def seek(self, pos, whence=0):
1040 if not (0 <= whence <= 2):
1041 raise ValueError("invalid whence value")
1042 with self._read_lock:
1043 if whence == 1:
1044 pos -= len(self._read_buf) - self._read_pos
1045 pos = _BufferedIOMixin.seek(self, pos, whence)
1046 self._reset_read_buf()
1047 return pos
1048
1049class BufferedWriter(_BufferedIOMixin):
1050
1051 """A buffer for a writeable sequential RawIO object.
1052
1053 The constructor creates a BufferedWriter for the given writeable raw
1054 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001055 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001056 """
1057
Benjamin Peterson59406a92009-03-26 17:10:29 +00001058 _warning_stack_offset = 2
1059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 def __init__(self, raw,
1061 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001062 if not raw.writable():
1063 raise IOError('"raw" argument must be writable.')
1064
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 _BufferedIOMixin.__init__(self, raw)
1066 if buffer_size <= 0:
1067 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001068 if max_buffer_size is not None:
1069 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1070 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001072 self._write_buf = bytearray()
1073 self._write_lock = Lock()
1074
1075 def write(self, b):
1076 if self.closed:
1077 raise ValueError("write to closed file")
1078 if isinstance(b, str):
1079 raise TypeError("can't write str to binary stream")
1080 with self._write_lock:
1081 # XXX we can implement some more tricks to try and avoid
1082 # partial writes
1083 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001084 # We're full, so let's pre-flush the buffer. (This may
1085 # raise BlockingIOError with characters_written == 0.)
1086 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 before = len(self._write_buf)
1088 self._write_buf.extend(b)
1089 written = len(self._write_buf) - before
1090 if len(self._write_buf) > self.buffer_size:
1091 try:
1092 self._flush_unlocked()
1093 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001094 if len(self._write_buf) > self.buffer_size:
1095 # We've hit the buffer_size. We have to accept a partial
1096 # write and cut back our buffer.
1097 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001099 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001100 raise BlockingIOError(e.errno, e.strerror, written)
1101 return written
1102
1103 def truncate(self, pos=None):
1104 with self._write_lock:
1105 self._flush_unlocked()
1106 if pos is None:
1107 pos = self.raw.tell()
1108 return self.raw.truncate(pos)
1109
1110 def flush(self):
1111 with self._write_lock:
1112 self._flush_unlocked()
1113
1114 def _flush_unlocked(self):
1115 if self.closed:
1116 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001117 while self._write_buf:
1118 try:
1119 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001120 except InterruptedError:
1121 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001122 except BlockingIOError:
1123 raise RuntimeError("self.raw should implement RawIOBase: it "
1124 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001125 if n is None:
1126 raise BlockingIOError(
1127 errno.EAGAIN,
1128 "write could not complete without blocking", 0)
1129 if n > len(self._write_buf) or n < 0:
1130 raise IOError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001131 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132
1133 def tell(self):
1134 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1135
1136 def seek(self, pos, whence=0):
1137 if not (0 <= whence <= 2):
1138 raise ValueError("invalid whence")
1139 with self._write_lock:
1140 self._flush_unlocked()
1141 return _BufferedIOMixin.seek(self, pos, whence)
1142
1143
1144class BufferedRWPair(BufferedIOBase):
1145
1146 """A buffered reader and writer object together.
1147
1148 A buffered reader object and buffered writer object put together to
1149 form a sequential IO object that can read and write. This is typically
1150 used with a socket or two-way pipe.
1151
1152 reader and writer are RawIOBase objects that are readable and
1153 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001154 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 """
1156
1157 # XXX The usefulness of this (compared to having two separate IO
1158 # objects) is questionable.
1159
1160 def __init__(self, reader, writer,
1161 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1162 """Constructor.
1163
1164 The arguments are two RawIO instances.
1165 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001166 if max_buffer_size is not None:
1167 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001168
1169 if not reader.readable():
1170 raise IOError('"reader" argument must be readable.')
1171
1172 if not writer.writable():
1173 raise IOError('"writer" argument must be writable.')
1174
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001176 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177
1178 def read(self, n=None):
1179 if n is None:
1180 n = -1
1181 return self.reader.read(n)
1182
1183 def readinto(self, b):
1184 return self.reader.readinto(b)
1185
1186 def write(self, b):
1187 return self.writer.write(b)
1188
1189 def peek(self, n=0):
1190 return self.reader.peek(n)
1191
1192 def read1(self, n):
1193 return self.reader.read1(n)
1194
1195 def readable(self):
1196 return self.reader.readable()
1197
1198 def writable(self):
1199 return self.writer.writable()
1200
1201 def flush(self):
1202 return self.writer.flush()
1203
1204 def close(self):
1205 self.writer.close()
1206 self.reader.close()
1207
1208 def isatty(self):
1209 return self.reader.isatty() or self.writer.isatty()
1210
1211 @property
1212 def closed(self):
1213 return self.writer.closed
1214
1215
1216class BufferedRandom(BufferedWriter, BufferedReader):
1217
1218 """A buffered interface to random access streams.
1219
1220 The constructor creates a reader and writer for a seekable stream,
1221 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001222 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001223 """
1224
Benjamin Peterson59406a92009-03-26 17:10:29 +00001225 _warning_stack_offset = 3
1226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227 def __init__(self, raw,
1228 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1229 raw._checkSeekable()
1230 BufferedReader.__init__(self, raw, buffer_size)
1231 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1232
1233 def seek(self, pos, whence=0):
1234 if not (0 <= whence <= 2):
1235 raise ValueError("invalid whence")
1236 self.flush()
1237 if self._read_buf:
1238 # Undo read ahead.
1239 with self._read_lock:
1240 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1241 # First do the raw seek, then empty the read buffer, so that
1242 # if the raw seek fails, we don't lose buffered data forever.
1243 pos = self.raw.seek(pos, whence)
1244 with self._read_lock:
1245 self._reset_read_buf()
1246 if pos < 0:
1247 raise IOError("seek() returned invalid position")
1248 return pos
1249
1250 def tell(self):
1251 if self._write_buf:
1252 return BufferedWriter.tell(self)
1253 else:
1254 return BufferedReader.tell(self)
1255
1256 def truncate(self, pos=None):
1257 if pos is None:
1258 pos = self.tell()
1259 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001260 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261
1262 def read(self, n=None):
1263 if n is None:
1264 n = -1
1265 self.flush()
1266 return BufferedReader.read(self, n)
1267
1268 def readinto(self, b):
1269 self.flush()
1270 return BufferedReader.readinto(self, b)
1271
1272 def peek(self, n=0):
1273 self.flush()
1274 return BufferedReader.peek(self, n)
1275
1276 def read1(self, n):
1277 self.flush()
1278 return BufferedReader.read1(self, n)
1279
1280 def write(self, b):
1281 if self._read_buf:
1282 # Undo readahead
1283 with self._read_lock:
1284 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1285 self._reset_read_buf()
1286 return BufferedWriter.write(self, b)
1287
1288
1289class TextIOBase(IOBase):
1290
1291 """Base class for text I/O.
1292
1293 This class provides a character and line based interface to stream
1294 I/O. There is no readinto method because Python's character strings
1295 are immutable. There is no public constructor.
1296 """
1297
Georg Brandl4d73b572011-01-13 07:13:06 +00001298 def read(self, n=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001299 """Read at most n characters from stream, where n is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300
1301 Read from underlying buffer until we have n characters or we hit EOF.
1302 If n is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001303
1304 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 """
1306 self._unsupported("read")
1307
Raymond Hettinger3c940242011-01-12 23:39:31 +00001308 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001309 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001310 self._unsupported("write")
1311
Georg Brandl4d73b572011-01-13 07:13:06 +00001312 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001313 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001314 self._unsupported("truncate")
1315
Raymond Hettinger3c940242011-01-12 23:39:31 +00001316 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 """Read until newline or EOF.
1318
1319 Returns an empty string if EOF is hit immediately.
1320 """
1321 self._unsupported("readline")
1322
Raymond Hettinger3c940242011-01-12 23:39:31 +00001323 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001324 """
1325 Separate the underlying buffer from the TextIOBase and return it.
1326
1327 After the underlying buffer has been detached, the TextIO is in an
1328 unusable state.
1329 """
1330 self._unsupported("detach")
1331
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001332 @property
1333 def encoding(self):
1334 """Subclasses should override."""
1335 return None
1336
1337 @property
1338 def newlines(self):
1339 """Line endings translated so far.
1340
1341 Only line endings translated during reading are considered.
1342
1343 Subclasses should override.
1344 """
1345 return None
1346
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001347 @property
1348 def errors(self):
1349 """Error setting of the decoder or encoder.
1350
1351 Subclasses should override."""
1352 return None
1353
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001354io.TextIOBase.register(TextIOBase)
1355
1356
1357class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1358 r"""Codec used when reading a file in universal newlines mode. It wraps
1359 another incremental decoder, translating \r\n and \r into \n. It also
1360 records the types of newlines encountered. When used with
1361 translate=False, it ensures that the newline sequence is returned in
1362 one piece.
1363 """
1364 def __init__(self, decoder, translate, errors='strict'):
1365 codecs.IncrementalDecoder.__init__(self, errors=errors)
1366 self.translate = translate
1367 self.decoder = decoder
1368 self.seennl = 0
1369 self.pendingcr = False
1370
1371 def decode(self, input, final=False):
1372 # decode input (with the eventual \r from a previous pass)
1373 if self.decoder is None:
1374 output = input
1375 else:
1376 output = self.decoder.decode(input, final=final)
1377 if self.pendingcr and (output or final):
1378 output = "\r" + output
1379 self.pendingcr = False
1380
1381 # retain last \r even when not translating data:
1382 # then readline() is sure to get \r\n in one pass
1383 if output.endswith("\r") and not final:
1384 output = output[:-1]
1385 self.pendingcr = True
1386
1387 # Record which newlines are read
1388 crlf = output.count('\r\n')
1389 cr = output.count('\r') - crlf
1390 lf = output.count('\n') - crlf
1391 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1392 | (crlf and self._CRLF)
1393
1394 if self.translate:
1395 if crlf:
1396 output = output.replace("\r\n", "\n")
1397 if cr:
1398 output = output.replace("\r", "\n")
1399
1400 return output
1401
1402 def getstate(self):
1403 if self.decoder is None:
1404 buf = b""
1405 flag = 0
1406 else:
1407 buf, flag = self.decoder.getstate()
1408 flag <<= 1
1409 if self.pendingcr:
1410 flag |= 1
1411 return buf, flag
1412
1413 def setstate(self, state):
1414 buf, flag = state
1415 self.pendingcr = bool(flag & 1)
1416 if self.decoder is not None:
1417 self.decoder.setstate((buf, flag >> 1))
1418
1419 def reset(self):
1420 self.seennl = 0
1421 self.pendingcr = False
1422 if self.decoder is not None:
1423 self.decoder.reset()
1424
1425 _LF = 1
1426 _CR = 2
1427 _CRLF = 4
1428
1429 @property
1430 def newlines(self):
1431 return (None,
1432 "\n",
1433 "\r",
1434 ("\r", "\n"),
1435 "\r\n",
1436 ("\n", "\r\n"),
1437 ("\r", "\r\n"),
1438 ("\r", "\n", "\r\n")
1439 )[self.seennl]
1440
1441
1442class TextIOWrapper(TextIOBase):
1443
1444 r"""Character and line based layer over a BufferedIOBase object, buffer.
1445
1446 encoding gives the name of the encoding that the stream will be
1447 decoded or encoded with. It defaults to locale.getpreferredencoding.
1448
1449 errors determines the strictness of encoding and decoding (see the
1450 codecs.register) and defaults to "strict".
1451
1452 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1453 handling of line endings. If it is None, universal newlines is
1454 enabled. With this enabled, on input, the lines endings '\n', '\r',
1455 or '\r\n' are translated to '\n' before being returned to the
1456 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001457 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001458 legal values, that newline becomes the newline when the file is read
1459 and it is returned untranslated. On output, '\n' is converted to the
1460 newline.
1461
1462 If line_buffering is True, a call to flush is implied when a call to
1463 write contains a newline character.
1464 """
1465
1466 _CHUNK_SIZE = 2048
1467
1468 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001469 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001470 if newline is not None and not isinstance(newline, str):
1471 raise TypeError("illegal newline type: %r" % (type(newline),))
1472 if newline not in (None, "", "\n", "\r", "\r\n"):
1473 raise ValueError("illegal newline value: %r" % (newline,))
1474 if encoding is None:
1475 try:
1476 encoding = os.device_encoding(buffer.fileno())
1477 except (AttributeError, UnsupportedOperation):
1478 pass
1479 if encoding is None:
1480 try:
1481 import locale
1482 except ImportError:
1483 # Importing locale may fail if Python is being built
1484 encoding = "ascii"
1485 else:
1486 encoding = locale.getpreferredencoding()
1487
1488 if not isinstance(encoding, str):
1489 raise ValueError("invalid encoding: %r" % encoding)
1490
1491 if errors is None:
1492 errors = "strict"
1493 else:
1494 if not isinstance(errors, str):
1495 raise ValueError("invalid errors: %r" % errors)
1496
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001497 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 self._line_buffering = line_buffering
1499 self._encoding = encoding
1500 self._errors = errors
1501 self._readuniversal = not newline
1502 self._readtranslate = newline is None
1503 self._readnl = newline
1504 self._writetranslate = newline != ''
1505 self._writenl = newline or os.linesep
1506 self._encoder = None
1507 self._decoder = None
1508 self._decoded_chars = '' # buffer for text returned from decoder
1509 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1510 self._snapshot = None # info for reconstructing decoder state
1511 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001512 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001513 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514
Antoine Pitroue4501852009-05-14 18:55:55 +00001515 if self._seekable and self.writable():
1516 position = self.buffer.tell()
1517 if position != 0:
1518 try:
1519 self._get_encoder().setstate(0)
1520 except LookupError:
1521 # Sometimes the encoder doesn't exist
1522 pass
1523
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1525 # where dec_flags is the second (integer) item of the decoder state
1526 # and next_input is the chunk of input bytes that comes next after the
1527 # snapshot point. We use this to reconstruct decoder states in tell().
1528
1529 # Naming convention:
1530 # - "bytes_..." for integer variables that count input bytes
1531 # - "chars_..." for integer variables that count decoded characters
1532
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001533 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001534 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001535 try:
1536 name = self.name
1537 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001538 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001539 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001540 result += " name={0!r}".format(name)
1541 try:
1542 mode = self.mode
1543 except AttributeError:
1544 pass
1545 else:
1546 result += " mode={0!r}".format(mode)
1547 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001548
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 @property
1550 def encoding(self):
1551 return self._encoding
1552
1553 @property
1554 def errors(self):
1555 return self._errors
1556
1557 @property
1558 def line_buffering(self):
1559 return self._line_buffering
1560
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001561 @property
1562 def buffer(self):
1563 return self._buffer
1564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 def seekable(self):
1566 return self._seekable
1567
1568 def readable(self):
1569 return self.buffer.readable()
1570
1571 def writable(self):
1572 return self.buffer.writable()
1573
1574 def flush(self):
1575 self.buffer.flush()
1576 self._telling = self._seekable
1577
1578 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001579 if self.buffer is not None and not self.closed:
1580 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001581 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582
1583 @property
1584 def closed(self):
1585 return self.buffer.closed
1586
1587 @property
1588 def name(self):
1589 return self.buffer.name
1590
1591 def fileno(self):
1592 return self.buffer.fileno()
1593
1594 def isatty(self):
1595 return self.buffer.isatty()
1596
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001597 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001598 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 if self.closed:
1600 raise ValueError("write to closed file")
1601 if not isinstance(s, str):
1602 raise TypeError("can't write %s to text stream" %
1603 s.__class__.__name__)
1604 length = len(s)
1605 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1606 if haslf and self._writetranslate and self._writenl != "\n":
1607 s = s.replace("\n", self._writenl)
1608 encoder = self._encoder or self._get_encoder()
1609 # XXX What if we were just reading?
1610 b = encoder.encode(s)
1611 self.buffer.write(b)
1612 if self._line_buffering and (haslf or "\r" in s):
1613 self.flush()
1614 self._snapshot = None
1615 if self._decoder:
1616 self._decoder.reset()
1617 return length
1618
1619 def _get_encoder(self):
1620 make_encoder = codecs.getincrementalencoder(self._encoding)
1621 self._encoder = make_encoder(self._errors)
1622 return self._encoder
1623
1624 def _get_decoder(self):
1625 make_decoder = codecs.getincrementaldecoder(self._encoding)
1626 decoder = make_decoder(self._errors)
1627 if self._readuniversal:
1628 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1629 self._decoder = decoder
1630 return decoder
1631
1632 # The following three methods implement an ADT for _decoded_chars.
1633 # Text returned from the decoder is buffered here until the client
1634 # requests it by calling our read() or readline() method.
1635 def _set_decoded_chars(self, chars):
1636 """Set the _decoded_chars buffer."""
1637 self._decoded_chars = chars
1638 self._decoded_chars_used = 0
1639
1640 def _get_decoded_chars(self, n=None):
1641 """Advance into the _decoded_chars buffer."""
1642 offset = self._decoded_chars_used
1643 if n is None:
1644 chars = self._decoded_chars[offset:]
1645 else:
1646 chars = self._decoded_chars[offset:offset + n]
1647 self._decoded_chars_used += len(chars)
1648 return chars
1649
1650 def _rewind_decoded_chars(self, n):
1651 """Rewind the _decoded_chars buffer."""
1652 if self._decoded_chars_used < n:
1653 raise AssertionError("rewind decoded_chars out of bounds")
1654 self._decoded_chars_used -= n
1655
1656 def _read_chunk(self):
1657 """
1658 Read and decode the next chunk of data from the BufferedReader.
1659 """
1660
1661 # The return value is True unless EOF was reached. The decoded
1662 # string is placed in self._decoded_chars (replacing its previous
1663 # value). The entire input chunk is sent to the decoder, though
1664 # some of it may remain buffered in the decoder, yet to be
1665 # converted.
1666
1667 if self._decoder is None:
1668 raise ValueError("no decoder")
1669
1670 if self._telling:
1671 # To prepare for tell(), we need to snapshot a point in the
1672 # file where the decoder's input buffer is empty.
1673
1674 dec_buffer, dec_flags = self._decoder.getstate()
1675 # Given this, we know there was a valid snapshot point
1676 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1677
1678 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001679 if self._has_read1:
1680 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1681 else:
1682 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001684 decoded_chars = self._decoder.decode(input_chunk, eof)
1685 self._set_decoded_chars(decoded_chars)
1686 if decoded_chars:
1687 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1688 else:
1689 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690
1691 if self._telling:
1692 # At the snapshot point, len(dec_buffer) bytes before the read,
1693 # the next input to be decoded is dec_buffer + input_chunk.
1694 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1695
1696 return not eof
1697
1698 def _pack_cookie(self, position, dec_flags=0,
1699 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1700 # The meaning of a tell() cookie is: seek to position, set the
1701 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1702 # into the decoder with need_eof as the EOF flag, then skip
1703 # chars_to_skip characters of the decoded result. For most simple
1704 # decoders, tell() will often just give a byte offset in the file.
1705 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1706 (chars_to_skip<<192) | bool(need_eof)<<256)
1707
1708 def _unpack_cookie(self, bigint):
1709 rest, position = divmod(bigint, 1<<64)
1710 rest, dec_flags = divmod(rest, 1<<64)
1711 rest, bytes_to_feed = divmod(rest, 1<<64)
1712 need_eof, chars_to_skip = divmod(rest, 1<<64)
1713 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1714
1715 def tell(self):
1716 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001717 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001718 if not self._telling:
1719 raise IOError("telling position disabled by next() call")
1720 self.flush()
1721 position = self.buffer.tell()
1722 decoder = self._decoder
1723 if decoder is None or self._snapshot is None:
1724 if self._decoded_chars:
1725 # This should never happen.
1726 raise AssertionError("pending decoded text")
1727 return position
1728
1729 # Skip backward to the snapshot point (see _read_chunk).
1730 dec_flags, next_input = self._snapshot
1731 position -= len(next_input)
1732
1733 # How many decoded characters have been used up since the snapshot?
1734 chars_to_skip = self._decoded_chars_used
1735 if chars_to_skip == 0:
1736 # We haven't moved from the snapshot point.
1737 return self._pack_cookie(position, dec_flags)
1738
1739 # Starting from the snapshot position, we will walk the decoder
1740 # forward until it gives us enough decoded characters.
1741 saved_state = decoder.getstate()
1742 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001743 # Fast search for an acceptable start point, close to our
1744 # current pos.
1745 # Rationale: calling decoder.decode() has a large overhead
1746 # regardless of chunk size; we want the number of such calls to
1747 # be O(1) in most situations (common decoders, non-crazy input).
1748 # Actually, it will be exactly 1 for fixed-size codecs (all
1749 # 8-bit codecs, also UTF-16 and UTF-32).
1750 skip_bytes = int(self._b2cratio * chars_to_skip)
1751 skip_back = 1
1752 assert skip_bytes <= len(next_input)
1753 while skip_bytes > 0:
1754 decoder.setstate((b'', dec_flags))
1755 # Decode up to temptative start point
1756 n = len(decoder.decode(next_input[:skip_bytes]))
1757 if n <= chars_to_skip:
1758 b, d = decoder.getstate()
1759 if not b:
1760 # Before pos and no bytes buffered in decoder => OK
1761 dec_flags = d
1762 chars_to_skip -= n
1763 break
1764 # Skip back by buffered amount and reset heuristic
1765 skip_bytes -= len(b)
1766 skip_back = 1
1767 else:
1768 # We're too far ahead, skip back a bit
1769 skip_bytes -= skip_back
1770 skip_back = skip_back * 2
1771 else:
1772 skip_bytes = 0
1773 decoder.setstate((b'', dec_flags))
1774
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001776 start_pos = position + skip_bytes
1777 start_flags = dec_flags
1778 if chars_to_skip == 0:
1779 # We haven't moved from the start point.
1780 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781
1782 # Feed the decoder one byte at a time. As we go, note the
1783 # nearest "safe start point" before the current location
1784 # (a point where the decoder has nothing buffered, so seek()
1785 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001786 bytes_fed = 0
1787 need_eof = 0
1788 # Chars decoded since `start_pos`
1789 chars_decoded = 0
1790 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001792 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 dec_buffer, dec_flags = decoder.getstate()
1794 if not dec_buffer and chars_decoded <= chars_to_skip:
1795 # Decoder buffer is empty, so this is a safe start point.
1796 start_pos += bytes_fed
1797 chars_to_skip -= chars_decoded
1798 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1799 if chars_decoded >= chars_to_skip:
1800 break
1801 else:
1802 # We didn't get enough decoded data; signal EOF to get more.
1803 chars_decoded += len(decoder.decode(b'', final=True))
1804 need_eof = 1
1805 if chars_decoded < chars_to_skip:
1806 raise IOError("can't reconstruct logical file position")
1807
1808 # The returned cookie corresponds to the last safe start point.
1809 return self._pack_cookie(
1810 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1811 finally:
1812 decoder.setstate(saved_state)
1813
1814 def truncate(self, pos=None):
1815 self.flush()
1816 if pos is None:
1817 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001818 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001819
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001820 def detach(self):
1821 if self.buffer is None:
1822 raise ValueError("buffer is already detached")
1823 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001824 buffer = self._buffer
1825 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001826 return buffer
1827
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 def seek(self, cookie, whence=0):
1829 if self.closed:
1830 raise ValueError("tell on closed file")
1831 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001832 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833 if whence == 1: # seek relative to current position
1834 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001835 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001836 # Seeking to the current position should attempt to
1837 # sync the underlying buffer with the current position.
1838 whence = 0
1839 cookie = self.tell()
1840 if whence == 2: # seek relative to end of file
1841 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001842 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 self.flush()
1844 position = self.buffer.seek(0, 2)
1845 self._set_decoded_chars('')
1846 self._snapshot = None
1847 if self._decoder:
1848 self._decoder.reset()
1849 return position
1850 if whence != 0:
1851 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1852 (whence,))
1853 if cookie < 0:
1854 raise ValueError("negative seek position %r" % (cookie,))
1855 self.flush()
1856
1857 # The strategy of seek() is to go back to the safe start point
1858 # and replay the effect of read(chars_to_skip) from there.
1859 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1860 self._unpack_cookie(cookie)
1861
1862 # Seek back to the safe start point.
1863 self.buffer.seek(start_pos)
1864 self._set_decoded_chars('')
1865 self._snapshot = None
1866
1867 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001868 if cookie == 0 and self._decoder:
1869 self._decoder.reset()
1870 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871 self._decoder = self._decoder or self._get_decoder()
1872 self._decoder.setstate((b'', dec_flags))
1873 self._snapshot = (dec_flags, b'')
1874
1875 if chars_to_skip:
1876 # Just like _read_chunk, feed the decoder and save a snapshot.
1877 input_chunk = self.buffer.read(bytes_to_feed)
1878 self._set_decoded_chars(
1879 self._decoder.decode(input_chunk, need_eof))
1880 self._snapshot = (dec_flags, input_chunk)
1881
1882 # Skip chars_to_skip of the decoded characters.
1883 if len(self._decoded_chars) < chars_to_skip:
1884 raise IOError("can't restore logical file position")
1885 self._decoded_chars_used = chars_to_skip
1886
Antoine Pitroue4501852009-05-14 18:55:55 +00001887 # Finally, reset the encoder (merely useful for proper BOM handling)
1888 try:
1889 encoder = self._encoder or self._get_encoder()
1890 except LookupError:
1891 # Sometimes the encoder doesn't exist
1892 pass
1893 else:
1894 if cookie != 0:
1895 encoder.setstate(0)
1896 else:
1897 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898 return cookie
1899
1900 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001901 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001902 if n is None:
1903 n = -1
1904 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001905 try:
1906 n.__index__
1907 except AttributeError as err:
1908 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001909 if n < 0:
1910 # Read everything.
1911 result = (self._get_decoded_chars() +
1912 decoder.decode(self.buffer.read(), final=True))
1913 self._set_decoded_chars('')
1914 self._snapshot = None
1915 return result
1916 else:
1917 # Keep reading chunks until we have n characters to return.
1918 eof = False
1919 result = self._get_decoded_chars(n)
1920 while len(result) < n and not eof:
1921 eof = not self._read_chunk()
1922 result += self._get_decoded_chars(n - len(result))
1923 return result
1924
1925 def __next__(self):
1926 self._telling = False
1927 line = self.readline()
1928 if not line:
1929 self._snapshot = None
1930 self._telling = self._seekable
1931 raise StopIteration
1932 return line
1933
1934 def readline(self, limit=None):
1935 if self.closed:
1936 raise ValueError("read from closed file")
1937 if limit is None:
1938 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001939 elif not isinstance(limit, int):
1940 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941
1942 # Grab all the decoded text (we will rewind any extra bits later).
1943 line = self._get_decoded_chars()
1944
1945 start = 0
1946 # Make the decoder if it doesn't already exist.
1947 if not self._decoder:
1948 self._get_decoder()
1949
1950 pos = endpos = None
1951 while True:
1952 if self._readtranslate:
1953 # Newlines are already translated, only search for \n
1954 pos = line.find('\n', start)
1955 if pos >= 0:
1956 endpos = pos + 1
1957 break
1958 else:
1959 start = len(line)
1960
1961 elif self._readuniversal:
1962 # Universal newline search. Find any of \r, \r\n, \n
1963 # The decoder ensures that \r\n are not split in two pieces
1964
1965 # In C we'd look for these in parallel of course.
1966 nlpos = line.find("\n", start)
1967 crpos = line.find("\r", start)
1968 if crpos == -1:
1969 if nlpos == -1:
1970 # Nothing found
1971 start = len(line)
1972 else:
1973 # Found \n
1974 endpos = nlpos + 1
1975 break
1976 elif nlpos == -1:
1977 # Found lone \r
1978 endpos = crpos + 1
1979 break
1980 elif nlpos < crpos:
1981 # Found \n
1982 endpos = nlpos + 1
1983 break
1984 elif nlpos == crpos + 1:
1985 # Found \r\n
1986 endpos = crpos + 2
1987 break
1988 else:
1989 # Found \r
1990 endpos = crpos + 1
1991 break
1992 else:
1993 # non-universal
1994 pos = line.find(self._readnl)
1995 if pos >= 0:
1996 endpos = pos + len(self._readnl)
1997 break
1998
1999 if limit >= 0 and len(line) >= limit:
2000 endpos = limit # reached length limit
2001 break
2002
2003 # No line ending seen yet - get more data'
2004 while self._read_chunk():
2005 if self._decoded_chars:
2006 break
2007 if self._decoded_chars:
2008 line += self._get_decoded_chars()
2009 else:
2010 # end of file
2011 self._set_decoded_chars('')
2012 self._snapshot = None
2013 return line
2014
2015 if limit >= 0 and endpos > limit:
2016 endpos = limit # don't exceed limit
2017
2018 # Rewind _decoded_chars to just after the line ending we found.
2019 self._rewind_decoded_chars(len(line) - endpos)
2020 return line[:endpos]
2021
2022 @property
2023 def newlines(self):
2024 return self._decoder.newlines if self._decoder else None
2025
2026
2027class StringIO(TextIOWrapper):
2028 """Text I/O implementation using an in-memory buffer.
2029
2030 The initial_value argument sets the value of object. The newline
2031 argument is like the one of TextIOWrapper's constructor.
2032 """
2033
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 def __init__(self, initial_value="", newline="\n"):
2035 super(StringIO, self).__init__(BytesIO(),
2036 encoding="utf-8",
2037 errors="strict",
2038 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002039 # Issue #5645: make universal newlines semantics the same as in the
2040 # C version, even under Windows.
2041 if newline is None:
2042 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002043 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002045 raise TypeError("initial_value must be str or None, not {0}"
2046 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 initial_value = str(initial_value)
2048 self.write(initial_value)
2049 self.seek(0)
2050
2051 def getvalue(self):
2052 self.flush()
2053 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002054
2055 def __repr__(self):
2056 # TextIOWrapper tells the encoding in its repr. In StringIO,
2057 # that's a implementation detail.
2058 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002059
2060 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002061 def errors(self):
2062 return None
2063
2064 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002065 def encoding(self):
2066 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002067
2068 def detach(self):
2069 # This doesn't make sense on StringIO.
2070 self._unsupported("detach")