Blame - Lib/io.py - platform/external/python/cpython2

blob: 213b0fc760793b12d392d2ad321649a2dbcd8318 [file] [log] [blame]

Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1	"""
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	2	The io module provides the Python interfaces to stream handling. The
				3	builtin open function is defined in this module.
				4
				5	At the top of the I/O hierarchy is the abstract base class IOBase. It
				6	defines the basic interface to a stream. Note, however, that there is no
				7	seperation between reading and writing to streams; implementations are
				8	allowed to throw an IOError if they do not support a given operation.
				9
				10	Extending IOBase is RawIOBase which deals simply with the reading and
				11	writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
				12	an interface to OS files.
				13
				14	BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
				15	subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
				16	streams that are readable, writable, and both respectively.
				17	BufferedRandom provides a buffered interface to random access
				18	streams. BytesIO is a simple stream of in-memory bytes.
				19
				20	Another IOBase subclass, TextIOBase, deals with the encoding and decoding
				21	of streams into text. TextIOWrapper, which extends it, is a buffered text
				22	interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
				23	is a in-memory stream for text.
				24
				25	Argument names are not part of the specification, and only the arguments
				26	of open() are intended to be used as keyword arguments.
				27
				28	data:
				29
				30	DEFAULT_BUFFER_SIZE
				31
				32	An int containing the default buffer size used by the module's buffered
				33	I/O classes. open() uses the file's blksize (as obtained by os.stat) if
				34	possible.
				35	"""
				36	# New I/O library conforming to PEP 3116.
				37
				38	# This is a prototype; hopefully eventually some of this will be
				39	# reimplemented in C.
				40
				41	# XXX edge cases when switching between reading/writing
				42	# XXX need to support 1 meaning line-buffered
				43	# XXX whenever an argument is None, use the default value
				44	# XXX read/write ops should check readable/writable
				45	# XXX buffered readinto should work with arbitrary buffer objects
				46	# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
				47	# XXX check writable, readable and seekable in appropriate places
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	48	from __future__ import print_function
				49	from __future__ import unicode_literals
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	50
				51	__author__ = ("Guido van Rossum <guido@python.org>, "
				52	"Mike Verdone <mike.verdone@gmail.com>, "
				53	"Mark Russell <mark.russell@zen.co.uk>")
				54
				55	__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
				56	"BytesIO", "StringIO", "BufferedIOBase",
				57	"BufferedReader", "BufferedWriter", "BufferedRWPair",
				58	"BufferedRandom", "TextIOBase", "TextIOWrapper"]
				59
				60	import os
				61	import abc
				62	import sys
				63	import codecs
				64	import _fileio
				65	import warnings
				66
				67	# open() uses st_blksize whenever we can
				68	DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
				69
				70	# py3k has only new style classes
				71	__metaclass__ = type
				72
				73	class BlockingIOError(IOError):
				74
				75	"""Exception raised when I/O would block on a non-blocking I/O stream."""
				76
				77	def __init__(self, errno, strerror, characters_written=0):
				78	IOError.__init__(self, errno, strerror)
				79	self.characters_written = characters_written
				80
				81
				82	def open(file, mode="r", buffering=None, encoding=None, errors=None,
				83	newline=None, closefd=True):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	84	r"""
				85	Open file and return a stream. If the file cannot be opened, an
				86	IOError is raised.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	87
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	88	file is either a string giving the name (and the path if the file
				89	isn't in the current working directory) of the file to be opened or an
				90	integer file descriptor of the file to be wrapped. (If a file
				91	descriptor is given, it is closed when the returned I/O object is
				92	closed, unless closefd is set to False.)
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	93
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	94	mode is an optional string that specifies the mode in which the file
				95	is opened. It defaults to 'r' which means open for reading in text
				96	mode. Other common values are 'w' for writing (truncating the file if
				97	it already exists), and 'a' for appending (which on some Unix systems,
				98	means that all writes append to the end of the file regardless of the
				99	current seek position). In text mode, if encoding is not specified the
				100	encoding used is platform dependent. (For reading and writing raw
				101	bytes use binary mode and leave encoding unspecified.) The available
				102	modes are:
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	103
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	104	========= ===============================================================
				105	Character Meaning
				106	--------- ---------------------------------------------------------------
				107	'r' open for reading (default)
				108	'w' open for writing, truncating the file first
				109	'a' open for writing, appending to the end of the file if it exists
				110	'b' binary mode
				111	't' text mode (default)
				112	'+' open a disk file for updating (reading and writing)
				113	'U' universal newline mode (for backwards compatibility; unneeded
				114	for new code)
				115	========= ===============================================================
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	116
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	117	The default mode is 'rt' (open for reading text). For binary random
				118	access, the mode 'w+b' opens and truncates the file to 0 bytes, while
				119	'r+b' opens the file without truncation.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	120
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	121	Python distinguishes between files opened in binary and text modes,
				122	even when the underlying operating system doesn't. Files opened in
				123	binary mode (appending 'b' to the mode argument) return contents as
				124	bytes objects without any decoding. In text mode (the default, or when
				125	't' is appended to the mode argument), the contents of the file are
				126	returned as strings, the bytes having been first decoded using a
				127	platform-dependent encoding or using the specified encoding if given.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	128
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	129	buffering is an optional integer used to set the buffering policy. By
				130	default full buffering is on. Pass 0 to switch buffering off (only
				131	allowed in binary mode), 1 to set line buffering, and an integer > 1
				132	for full buffering.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	133
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	134	encoding is the name of the encoding used to decode or encode the
				135	file. This should only be used in text mode. The default encoding is
				136	platform dependent, but any encoding supported by Python can be
				137	passed. See the codecs module for the list of supported encodings.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	138
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	139	errors is an optional string that specifies how encoding errors are to
				140	be handled---this argument should not be used in binary mode. Pass
				141	'strict' to raise a ValueError exception if there is an encoding error
				142	(the default of None has the same effect), or pass 'ignore' to ignore
				143	errors. (Note that ignoring encoding errors can lead to data loss.)
				144	See the documentation for codecs.register for a list of the permitted
				145	encoding error strings.
				146
				147	newline controls how universal newlines works (it only applies to text
				148	mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
				149	follows:
				150
				151	* On input, if newline is None, universal newlines mode is
				152	enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
				153	these are translated into '\n' before being returned to the
				154	caller. If it is '', universal newline mode is enabled, but line
				155	endings are returned to the caller untranslated. If it has any of
				156	the other legal values, input lines are only terminated by the given
				157	string, and the line ending is returned to the caller untranslated.
				158
				159	* On output, if newline is None, any '\n' characters written are
				160	translated to the system default line separator, os.linesep. If
				161	newline is '', no translation takes place. If newline is any of the
				162	other legal values, any '\n' characters written are translated to
				163	the given string.
				164
				165	If closefd is False, the underlying file descriptor will be kept open
				166	when the file is closed. This does not work when a file name is given
				167	and must be True in that case.
				168
				169	open() returns a file object whose type depends on the mode, and
				170	through which the standard file operations such as reading and writing
				171	are performed. When open() is used to open a file in a text mode ('w',
				172	'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
				173	a file in a binary mode, the returned class varies: in read binary
				174	mode, it returns a BufferedReader; in write binary and append binary
				175	modes, it returns a BufferedWriter, and in read/write mode, it returns
				176	a BufferedRandom.
				177
				178	It is also possible to use a string or bytearray as a file for both
				179	reading and writing. For strings StringIO can be used like a file
				180	opened in a text mode, and for bytes a BytesIO can be used like a file
				181	opened in a binary mode.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	182	"""
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	183	if not isinstance(file, (basestring, int)):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	184	raise TypeError("invalid file: %r" % file)
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	185	if not isinstance(mode, basestring):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	186	raise TypeError("invalid mode: %r" % mode)
				187	if buffering is not None and not isinstance(buffering, int):
				188	raise TypeError("invalid buffering: %r" % buffering)
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	189	if encoding is not None and not isinstance(encoding, basestring):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	190	raise TypeError("invalid encoding: %r" % encoding)
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	191	if errors is not None and not isinstance(errors, basestring):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	192	raise TypeError("invalid errors: %r" % errors)
				193	modes = set(mode)
				194	if modes - set("arwb+tU") or len(mode) > len(modes):
				195	raise ValueError("invalid mode: %r" % mode)
				196	reading = "r" in modes
				197	writing = "w" in modes
				198	appending = "a" in modes
				199	updating = "+" in modes
				200	text = "t" in modes
				201	binary = "b" in modes
				202	if "U" in modes:
				203	if writing or appending:
				204	raise ValueError("can't use U and writing mode at once")
				205	reading = True
				206	if text and binary:
				207	raise ValueError("can't have text and binary mode at once")
				208	if reading + writing + appending > 1:
				209	raise ValueError("can't have read/write/append mode at once")
				210	if not (reading or writing or appending):
				211	raise ValueError("must have exactly one of read/write/append mode")
				212	if binary and encoding is not None:
				213	raise ValueError("binary mode doesn't take an encoding argument")
				214	if binary and errors is not None:
				215	raise ValueError("binary mode doesn't take an errors argument")
				216	if binary and newline is not None:
				217	raise ValueError("binary mode doesn't take a newline argument")
				218	raw = FileIO(file,
				219	(reading and "r" or "") +
				220	(writing and "w" or "") +
				221	(appending and "a" or "") +
				222	(updating and "+" or ""),
				223	closefd)
				224	if buffering is None:
				225	buffering = -1
				226	line_buffering = False
				227	if buffering == 1 or buffering < 0 and raw.isatty():
				228	buffering = -1
				229	line_buffering = True
				230	if buffering < 0:
				231	buffering = DEFAULT_BUFFER_SIZE
				232	try:
				233	bs = os.fstat(raw.fileno()).st_blksize
				234	except (os.error, AttributeError):
				235	pass
				236	else:
				237	if bs > 1:
				238	buffering = bs
				239	if buffering < 0:
				240	raise ValueError("invalid buffering size")
				241	if buffering == 0:
				242	if binary:
				243	raw._name = file
				244	raw._mode = mode
				245	return raw
				246	raise ValueError("can't have unbuffered text I/O")
				247	if updating:
				248	buffer = BufferedRandom(raw, buffering)
				249	elif writing or appending:
				250	buffer = BufferedWriter(raw, buffering)
				251	elif reading:
				252	buffer = BufferedReader(raw, buffering)
				253	else:
				254	raise ValueError("unknown mode: %r" % mode)
				255	if binary:
				256	buffer.name = file
				257	buffer.mode = mode
				258	return buffer
				259	text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
				260	text.name = file
				261	text.mode = mode
				262	return text
				263
				264	class _DocDescriptor:
				265	"""Helper for builtins.open.__doc__
				266	"""
				267	def __get__(self, obj, typ):
				268	return (
				269	"open(file, mode='r', buffering=None, encoding=None, "
				270	"errors=None, newline=None, closefd=True)\n\n" +
				271	open.__doc__)
				272
				273	class OpenWrapper:
				274	"""Wrapper for builtins.open
				275
				276	Trick so that open won't become a bound method when stored
				277	as a class variable (as dumbdbm does).
				278
				279	See initstdio() in Python/pythonrun.c.
				280	"""
				281	__doc__ = _DocDescriptor()
				282
				283	def __new__(cls, args, *kwargs):
				284	return open(args, *kwargs)
				285
				286
				287	class UnsupportedOperation(ValueError, IOError):
				288	pass
				289
				290
				291	class IOBase(object):
				292
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	293	"""
				294	The abstract base class for all I/O classes, acting on streams of
				295	bytes. There is no public constructor.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	296
				297	This class provides dummy implementations for many methods that
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	298	derived classes can override selectively; the default implementations
				299	represent a file that cannot be read, written or seeked.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	300
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	301	Even though IOBase does not declare read, readinto, or write because
				302	their signatures will vary, implementations and clients should
				303	consider those methods part of the interface. Also, implementations
				304	may raise a IOError when operations they do not support are called.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	305
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	306	The basic type used for binary data read from or written to a file is
				307	bytes. bytearrays are accepted too, and in some cases (such as
				308	readinto) needed. Text I/O classes work with str data.
				309
				310	Note that calling any method (even inquiries) on a closed stream is
				311	undefined. Implementations may raise IOError in this case.
				312
				313	IOBase (and its subclasses) support the iterator protocol, meaning
				314	that an IOBase object can be iterated over yielding the lines in a
				315	stream.
				316
				317	IOBase also supports the :keyword:`with` statement. In this example,
				318	fp is closed after the suite of the with statment is complete:
				319
				320	with open('spam.txt', 'r') as fp:
				321	fp.write('Spam and eggs!')
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	322	"""
				323
				324	__metaclass__ = abc.ABCMeta
				325
				326	### Internal ###
				327
				328	def _unsupported(self, name):
				329	"""Internal: raise an exception for unsupported operations."""
				330	raise UnsupportedOperation("%s.%s() not supported" %
				331	(self.__class__.__name__, name))
				332
				333	### Positioning ###
				334
				335	def seek(self, pos, whence = 0):
				336	"""seek(pos: int, whence: int = 0) -> int. Change stream position.
				337
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	338	Change the stream position to byte offset offset. offset is
				339	interpreted relative to the position indicated by whence. Values
				340	for whence are:
				341
				342	* 0 -- start of stream (the default); offset should be zero or positive
				343	* 1 -- current stream position; offset may be negative
				344	* 2 -- end of stream; offset is usually negative
				345
				346	Return the new absolute position.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	347	"""
				348	self._unsupported("seek")
				349
				350	def tell(self):
				351	"""tell() -> int. Return current stream position."""
				352	return self.seek(0, 1)
				353
				354	def truncate(self, pos = None):
				355	"""truncate(size: int = None) -> int. Truncate file to size bytes.
				356
				357	Size defaults to the current IO position as reported by tell().
				358	Returns the new size.
				359	"""
				360	self._unsupported("truncate")
				361
				362	### Flush and close ###
				363
				364	def flush(self):
				365	"""flush() -> None. Flushes write buffers, if applicable.
				366
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	367	This is not implemented for read-only and non-blocking streams.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	368	"""
				369	# XXX Should this return the number of bytes written???
				370
				371	__closed = False
				372
				373	def close(self):
				374	"""close() -> None. Flushes and closes the IO object.
				375
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	376	This method has no effect if the file is already closed.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	377	"""
				378	if not self.__closed:
				379	try:
				380	self.flush()
				381	except IOError:
				382	pass # If flush() fails, just give up
				383	self.__closed = True
				384
				385	def __del__(self):
				386	"""Destructor. Calls close()."""
				387	# The try/except block is in case this is called at program
				388	# exit time, when it's possible that globals have already been
				389	# deleted, and then the close() call might fail. Since
				390	# there's nothing we can do about such failures and they annoy
				391	# the end users, we suppress the traceback.
				392	try:
				393	self.close()
				394	except:
				395	pass
				396
				397	### Inquiries ###
				398
				399	def seekable(self):
				400	"""seekable() -> bool. Return whether object supports random access.
				401
				402	If False, seek(), tell() and truncate() will raise IOError.
				403	This method may need to do a test seek().
				404	"""
				405	return False
				406
				407	def _checkSeekable(self, msg=None):
				408	"""Internal: raise an IOError if file is not seekable
				409	"""
				410	if not self.seekable():
				411	raise IOError("File or stream is not seekable."
				412	if msg is None else msg)
				413
				414
				415	def readable(self):
				416	"""readable() -> bool. Return whether object was opened for reading.
				417
				418	If False, read() will raise IOError.
				419	"""
				420	return False
				421
				422	def _checkReadable(self, msg=None):
				423	"""Internal: raise an IOError if file is not readable
				424	"""
				425	if not self.readable():
				426	raise IOError("File or stream is not readable."
				427	if msg is None else msg)
				428
				429	def writable(self):
				430	"""writable() -> bool. Return whether object was opened for writing.
				431
				432	If False, write() and truncate() will raise IOError.
				433	"""
				434	return False
				435
				436	def _checkWritable(self, msg=None):
				437	"""Internal: raise an IOError if file is not writable
				438	"""
				439	if not self.writable():
				440	raise IOError("File or stream is not writable."
				441	if msg is None else msg)
				442
				443	@property
				444	def closed(self):
				445	"""closed: bool. True iff the file has been closed.
				446
				447	For backwards compatibility, this is a property, not a predicate.
				448	"""
				449	return self.__closed
				450
				451	def _checkClosed(self, msg=None):
				452	"""Internal: raise an ValueError if file is closed
				453	"""
				454	if self.closed:
				455	raise ValueError("I/O operation on closed file."
				456	if msg is None else msg)
				457
				458	### Context manager ###
				459
				460	def __enter__(self):
				461	"""Context management protocol. Returns self."""
				462	self._checkClosed()
				463	return self
				464
				465	def __exit__(self, *args):
				466	"""Context management protocol. Calls close()"""
				467	self.close()
				468
				469	### Lower-level APIs ###
				470
				471	# XXX Should these be present even if unimplemented?
				472
				473	def fileno(self):
				474	"""fileno() -> int. Returns underlying file descriptor if one exists.
				475
				476	Raises IOError if the IO object does not use a file descriptor.
				477	"""
				478	self._unsupported("fileno")
				479
				480	def isatty(self):
				481	"""isatty() -> int. Returns whether this is an 'interactive' stream.
				482
				483	Returns False if we don't know.
				484	"""
				485	self._checkClosed()
				486	return False
				487
				488	### Readline[s] and writelines ###
				489
				490	def readline(self, limit = -1):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	491	r"""readline(limit: int = -1) -> bytes Read and return a line from the
				492	stream.
				493
				494	If limit is specified, at most limit bytes will be read.
				495
				496	The line terminator is always b'\n' for binary files; for text
				497	files, the newlines argument to open can be used to select the line
				498	terminator(s) recognized.
				499	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	500	if hasattr(self, "peek"):
				501	def nreadahead():
				502	readahead = self.peek(1)
				503	if not readahead:
				504	return 1
				505	n = (readahead.find(b"\n") + 1) or len(readahead)
				506	if limit >= 0:
				507	n = min(n, limit)
				508	return n
				509	else:
				510	def nreadahead():
				511	return 1
				512	if limit is None:
				513	limit = -1
				514	res = bytearray()
				515	while limit < 0 or len(res) < limit:
				516	b = self.read(nreadahead())
				517	if not b:
				518	break
				519	res += b
				520	if res.endswith(b"\n"):
				521	break
				522	return bytes(res)
				523
				524	def __iter__(self):
				525	self._checkClosed()
				526	return self
				527
				528	def next(self):
				529	line = self.readline()
				530	if not line:
				531	raise StopIteration
				532	return line
				533
				534	def readlines(self, hint=None):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	535	"""readlines(hint=None) -> list Return a list of lines from the stream.
				536
				537	hint can be specified to control the number of lines read: no more
				538	lines will be read if the total size (in bytes/characters) of all
				539	lines so far exceeds hint.
				540	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	541	if hint is None:
				542	return list(self)
				543	n = 0
				544	lines = []
				545	for line in self:
				546	lines.append(line)
				547	n += len(line)
				548	if n >= hint:
				549	break
				550	return lines
				551
				552	def writelines(self, lines):
				553	self._checkClosed()
				554	for line in lines:
				555	self.write(line)
				556
				557
				558	class RawIOBase(IOBase):
				559
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	560	"""Base class for raw binary I/O."""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	561
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	562	# The read() method is implemented by calling readinto(); derived
				563	# classes that want to support read() only need to implement
				564	# readinto() as a primitive operation. In general, readinto() can be
				565	# more efficient than read().
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	566
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	567	# (It would be tempting to also provide an implementation of
				568	# readinto() in terms of read(), in case the latter is a more suitable
				569	# primitive operation, but that would lead to nasty recursion in case
				570	# a subclass doesn't implement either.)
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	571
				572	def read(self, n = -1):
				573	"""read(n: int) -> bytes. Read and return up to n bytes.
				574
				575	Returns an empty bytes array on EOF, or None if the object is
				576	set not to block and has no data to read.
				577	"""
				578	if n is None:
				579	n = -1
				580	if n < 0:
				581	return self.readall()
				582	b = bytearray(n.__index__())
				583	n = self.readinto(b)
				584	del b[n:]
				585	return bytes(b)
				586
				587	def readall(self):
				588	"""readall() -> bytes. Read until EOF, using multiple read() call."""
				589	res = bytearray()
				590	while True:
				591	data = self.read(DEFAULT_BUFFER_SIZE)
				592	if not data:
				593	break
				594	res += data
				595	return bytes(res)
				596
				597	def readinto(self, b):
				598	"""readinto(b: bytes) -> int. Read up to len(b) bytes into b.
				599
				600	Returns number of bytes read (0 for EOF), or None if the object
				601	is set not to block as has no data to read.
				602	"""
				603	self._unsupported("readinto")
				604
				605	def write(self, b):
				606	"""write(b: bytes) -> int. Write the given buffer to the IO stream.
				607
				608	Returns the number of bytes written, which may be less than len(b).
				609	"""
				610	self._unsupported("write")
				611
				612
				613	class FileIO(_fileio._FileIO, RawIOBase):
				614
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	615	"""Raw I/O implementation for OS files."""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	616
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	617	# This multiply inherits from _FileIO and RawIOBase to make
				618	# isinstance(io.FileIO(), io.RawIOBase) return True without requiring
				619	# that _fileio._FileIO inherits from io.RawIOBase (which would be hard
				620	# to do since _fileio.c is written in C).
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	621
				622	def close(self):
				623	_fileio._FileIO.close(self)
				624	RawIOBase.close(self)
				625
				626	@property
				627	def name(self):
				628	return self._name
				629
				630	@property
				631	def mode(self):
				632	return self._mode
				633
				634
				635	class BufferedIOBase(IOBase):
				636
				637	"""Base class for buffered IO objects.
				638
				639	The main difference with RawIOBase is that the read() method
				640	supports omitting the size argument, and does not have a default
				641	implementation that defers to readinto().
				642
				643	In addition, read(), readinto() and write() may raise
				644	BlockingIOError if the underlying raw stream is in non-blocking
				645	mode and not ready; unlike their raw counterparts, they will never
				646	return None.
				647
				648	A typical implementation should not inherit from a RawIOBase
				649	implementation, but wrap one.
				650	"""
				651
				652	def read(self, n = None):
				653	"""read(n: int = None) -> bytes. Read and return up to n bytes.
				654
				655	If the argument is omitted, None, or negative, reads and
				656	returns all data until EOF.
				657
				658	If the argument is positive, and the underlying raw stream is
				659	not 'interactive', multiple raw reads may be issued to satisfy
				660	the byte count (unless EOF is reached first). But for
				661	interactive raw streams (XXX and for pipes?), at most one raw
				662	read will be issued, and a short result does not imply that
				663	EOF is imminent.
				664
				665	Returns an empty bytes array on EOF.
				666
				667	Raises BlockingIOError if the underlying raw stream has no
				668	data at the moment.
				669	"""
				670	self._unsupported("read")
				671
				672	def readinto(self, b):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	673	"""readinto(b: bytearray) -> int. Read up to len(b) bytes into b.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	674
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	675	Like read(), this may issue multiple reads to the underlying raw
				676	stream, unless the latter is 'interactive'.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	677
				678	Returns the number of bytes read (0 for EOF).
				679
				680	Raises BlockingIOError if the underlying raw stream has no
				681	data at the moment.
				682	"""
				683	# XXX This ought to work with anything that supports the buffer API
				684	data = self.read(len(b))
				685	n = len(data)
				686	try:
				687	b[:n] = data
				688	except TypeError as err:
				689	import array
				690	if not isinstance(b, array.array):
				691	raise err
				692	b[:n] = array.array('b', data)
				693	return n
				694
				695	def write(self, b):
				696	"""write(b: bytes) -> int. Write the given buffer to the IO stream.
				697
				698	Returns the number of bytes written, which is never less than
				699	len(b).
				700
				701	Raises BlockingIOError if the buffer is full and the
				702	underlying raw stream cannot accept more data at the moment.
				703	"""
				704	self._unsupported("write")
				705
				706
				707	class _BufferedIOMixin(BufferedIOBase):
				708
				709	"""A mixin implementation of BufferedIOBase with an underlying raw stream.
				710
				711	This passes most requests on to the underlying raw stream. It
				712	does not provide implementations of read(), readinto() or
				713	write().
				714	"""
				715
				716	def __init__(self, raw):
				717	self.raw = raw
				718
				719	### Positioning ###
				720
				721	def seek(self, pos, whence=0):
				722	return self.raw.seek(pos, whence)
				723
				724	def tell(self):
				725	return self.raw.tell()
				726
				727	def truncate(self, pos=None):
				728	# Flush the stream. We're mixing buffered I/O with lower-level I/O,
				729	# and a flush may be necessary to synch both views of the current
				730	# file state.
				731	self.flush()
				732
				733	if pos is None:
				734	pos = self.tell()
				735	return self.raw.truncate(pos)
				736
				737	### Flush and close ###
				738
				739	def flush(self):
				740	self.raw.flush()
				741
				742	def close(self):
				743	if not self.closed:
				744	try:
				745	self.flush()
				746	except IOError:
				747	pass # If flush() fails, just give up
				748	self.raw.close()
				749
				750	### Inquiries ###
				751
				752	def seekable(self):
				753	return self.raw.seekable()
				754
				755	def readable(self):
				756	return self.raw.readable()
				757
				758	def writable(self):
				759	return self.raw.writable()
				760
				761	@property
				762	def closed(self):
				763	return self.raw.closed
				764
				765	### Lower-level APIs ###
				766
				767	def fileno(self):
				768	return self.raw.fileno()
				769
				770	def isatty(self):
				771	return self.raw.isatty()
				772
				773
				774	class BytesIO(BufferedIOBase):
				775
				776	"""Buffered I/O implementation using an in-memory bytes buffer."""
				777
				778	# XXX More docs
				779
				780	def __init__(self, initial_bytes=None):
				781	buf = bytearray()
				782	if initial_bytes is not None:
				783	buf += initial_bytes
				784	self._buffer = buf
				785	self._pos = 0
				786
				787	def getvalue(self):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	788	"""getvalue() -> bytes Return the bytes value (contents) of the buffer
				789	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	790	return bytes(self._buffer)
				791
				792	def read(self, n=None):
				793	if n is None:
				794	n = -1
				795	if n < 0:
				796	n = len(self._buffer)
				797	newpos = min(len(self._buffer), self._pos + n)
				798	b = self._buffer[self._pos : newpos]
				799	self._pos = newpos
				800	return bytes(b)
				801
				802	def read1(self, n):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	803	"""In BytesIO, this is the same as read.
				804	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	805	return self.read(n)
				806
				807	def write(self, b):
				808	if self.closed:
				809	raise ValueError("write to closed file")
				810	if isinstance(b, unicode):
				811	raise TypeError("can't write unicode to binary stream")
				812	n = len(b)
				813	newpos = self._pos + n
				814	if newpos > len(self._buffer):
				815	# Inserts null bytes between the current end of the file
				816	# and the new write position.
				817	padding = b'\x00' * (newpos - len(self._buffer) - n)
				818	self._buffer[self._pos:newpos - n] = padding
				819	self._buffer[self._pos:newpos] = b
				820	self._pos = newpos
				821	return n
				822
				823	def seek(self, pos, whence=0):
				824	try:
				825	pos = pos.__index__()
				826	except AttributeError as err:
				827	raise TypeError("an integer is required") # from err
				828	if whence == 0:
				829	self._pos = max(0, pos)
				830	elif whence == 1:
				831	self._pos = max(0, self._pos + pos)
				832	elif whence == 2:
				833	self._pos = max(0, len(self._buffer) + pos)
				834	else:
				835	raise IOError("invalid whence value")
				836	return self._pos
				837
				838	def tell(self):
				839	return self._pos
				840
				841	def truncate(self, pos=None):
				842	if pos is None:
				843	pos = self._pos
				844	del self._buffer[pos:]
				845	return pos
				846
				847	def readable(self):
				848	return True
				849
				850	def writable(self):
				851	return True
				852
				853	def seekable(self):
				854	return True
				855
				856
				857	class BufferedReader(_BufferedIOMixin):
				858
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	859	"""BufferedReader(raw[, buffer_size])
				860
				861	A buffer for a readable, sequential BaseRawIO object.
				862
				863	The constructor creates a BufferedReader for the given readable raw
				864	stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
				865	is used.
				866	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	867
				868	def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
				869	"""Create a new buffered reader using the given readable raw IO object.
				870	"""
				871	raw._checkReadable()
				872	_BufferedIOMixin.__init__(self, raw)
				873	self._read_buf = b""
				874	self.buffer_size = buffer_size
				875
				876	def read(self, n=None):
				877	"""Read n bytes.
				878
				879	Returns exactly n bytes of data unless the underlying raw IO
				880	stream reaches EOF or if the call would block in non-blocking
				881	mode. If n is negative, read until EOF or until read() would
				882	block.
				883	"""
				884	if n is None:
				885	n = -1
				886	nodata_val = b""
				887	while n < 0 or len(self._read_buf) < n:
				888	to_read = max(self.buffer_size,
				889	n if n is not None else 2*len(self._read_buf))
				890	current = self.raw.read(to_read)
				891	if current in (b"", None):
				892	nodata_val = current
				893	break
				894	self._read_buf += current
				895	if self._read_buf:
				896	if n < 0:
				897	n = len(self._read_buf)
				898	out = self._read_buf[:n]
				899	self._read_buf = self._read_buf[n:]
				900	else:
				901	out = nodata_val
				902	return out
				903
				904	def peek(self, n=0):
				905	"""Returns buffered bytes without advancing the position.
				906
				907	The argument indicates a desired minimal number of bytes; we
				908	do at most one raw read to satisfy it. We never return more
				909	than self.buffer_size.
				910	"""
				911	want = min(n, self.buffer_size)
				912	have = len(self._read_buf)
				913	if have < want:
				914	to_read = self.buffer_size - have
				915	current = self.raw.read(to_read)
				916	if current:
				917	self._read_buf += current
				918	return self._read_buf
				919
				920	def read1(self, n):
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	921	"""Reads up to n bytes, with at most one read() system call."""
				922	# Returns up to n bytes. If at least one byte is buffered, we
				923	# only return buffered bytes. Otherwise, we do one raw read.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	924	if n <= 0:
				925	return b""
				926	self.peek(1)
				927	return self.read(min(n, len(self._read_buf)))
				928
				929	def tell(self):
				930	return self.raw.tell() - len(self._read_buf)
				931
				932	def seek(self, pos, whence=0):
				933	if whence == 1:
				934	pos -= len(self._read_buf)
				935	pos = self.raw.seek(pos, whence)
				936	self._read_buf = b""
				937	return pos
				938
				939
				940	class BufferedWriter(_BufferedIOMixin):
				941
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	942	"""BufferedWriter(raw[, buffer_size[, max_buffer_size]])
				943
				944	A buffer for a writeable sequential RawIO object.
				945
				946	The constructor creates a BufferedWriter for the given writeable raw
				947	stream. If the buffer_size is not given, it defaults to
				948	DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
				949	twice the buffer size.
				950	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	951
				952	def __init__(self, raw,
				953	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
				954	raw._checkWritable()
				955	_BufferedIOMixin.__init__(self, raw)
				956	self.buffer_size = buffer_size
				957	self.max_buffer_size = (2*buffer_size
				958	if max_buffer_size is None
				959	else max_buffer_size)
				960	self._write_buf = bytearray()
				961
				962	def write(self, b):
				963	if self.closed:
				964	raise ValueError("write to closed file")
				965	if isinstance(b, unicode):
				966	raise TypeError("can't write unicode to binary stream")
				967	# XXX we can implement some more tricks to try and avoid partial writes
				968	if len(self._write_buf) > self.buffer_size:
				969	# We're full, so let's pre-flush the buffer
				970	try:
				971	self.flush()
				972	except BlockingIOError as e:
				973	# We can't accept anything else.
				974	# XXX Why not just let the exception pass through?
				975	raise BlockingIOError(e.errno, e.strerror, 0)
				976	before = len(self._write_buf)
				977	self._write_buf.extend(b)
				978	written = len(self._write_buf) - before
				979	if len(self._write_buf) > self.buffer_size:
				980	try:
				981	self.flush()
				982	except BlockingIOError as e:
				983	if (len(self._write_buf) > self.max_buffer_size):
				984	# We've hit max_buffer_size. We have to accept a partial
				985	# write and cut back our buffer.
				986	overage = len(self._write_buf) - self.max_buffer_size
				987	self._write_buf = self._write_buf[:self.max_buffer_size]
				988	raise BlockingIOError(e.errno, e.strerror, overage)
				989	return written
				990
				991	def flush(self):
				992	if self.closed:
				993	raise ValueError("flush of closed file")
				994	written = 0
				995	try:
				996	while self._write_buf:
				997	n = self.raw.write(self._write_buf)
				998	del self._write_buf[:n]
				999	written += n
				1000	except BlockingIOError as e:
				1001	n = e.characters_written
				1002	del self._write_buf[:n]
				1003	written += n
				1004	raise BlockingIOError(e.errno, e.strerror, written)
				1005
				1006	def tell(self):
				1007	return self.raw.tell() + len(self._write_buf)
				1008
				1009	def seek(self, pos, whence=0):
				1010	self.flush()
				1011	return self.raw.seek(pos, whence)
				1012
				1013
				1014	class BufferedRWPair(BufferedIOBase):
				1015
				1016	"""A buffered reader and writer object together.
				1017
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1018	A buffered reader object and buffered writer object put together to
				1019	form a sequential IO object that can read and write. This is typically
				1020	used with a socket or two-way pipe.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1021
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1022	reader and writer are RawIOBase objects that are readable and
				1023	writeable respectively. If the buffer_size is omitted it defaults to
				1024	DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
				1025	defaults to twice the buffer size.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1026	"""
				1027
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1028	# XXX The usefulness of this (compared to having two separate IO
				1029	# objects) is questionable.
				1030
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1031	def __init__(self, reader, writer,
				1032	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
				1033	"""Constructor.
				1034
				1035	The arguments are two RawIO instances.
				1036	"""
				1037	reader._checkReadable()
				1038	writer._checkWritable()
				1039	self.reader = BufferedReader(reader, buffer_size)
				1040	self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
				1041
				1042	def read(self, n=None):
				1043	if n is None:
				1044	n = -1
				1045	return self.reader.read(n)
				1046
				1047	def readinto(self, b):
				1048	return self.reader.readinto(b)
				1049
				1050	def write(self, b):
				1051	return self.writer.write(b)
				1052
				1053	def peek(self, n=0):
				1054	return self.reader.peek(n)
				1055
				1056	def read1(self, n):
				1057	return self.reader.read1(n)
				1058
				1059	def readable(self):
				1060	return self.reader.readable()
				1061
				1062	def writable(self):
				1063	return self.writer.writable()
				1064
				1065	def flush(self):
				1066	return self.writer.flush()
				1067
				1068	def close(self):
				1069	self.writer.close()
				1070	self.reader.close()
				1071
				1072	def isatty(self):
				1073	return self.reader.isatty() or self.writer.isatty()
				1074
				1075	@property
				1076	def closed(self):
				1077	return self.writer.closed()
				1078
				1079
				1080	class BufferedRandom(BufferedWriter, BufferedReader):
				1081
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1082	"""BufferedRandom(raw[, buffer_size[, max_buffer_size]])
				1083
				1084	A buffered interface to random access streams.
				1085
				1086	The constructor creates a reader and writer for a seekable stream,
				1087	raw, given in the first argument. If the buffer_size is omitted it
				1088	defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
				1089	writer) defaults to twice the buffer size.
				1090	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1091
				1092	def __init__(self, raw,
				1093	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
				1094	raw._checkSeekable()
				1095	BufferedReader.__init__(self, raw, buffer_size)
				1096	BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
				1097
				1098	def seek(self, pos, whence=0):
				1099	self.flush()
				1100	# First do the raw seek, then empty the read buffer, so that
				1101	# if the raw seek fails, we don't lose buffered data forever.
				1102	pos = self.raw.seek(pos, whence)
				1103	self._read_buf = b""
				1104	return pos
				1105
				1106	def tell(self):
				1107	if (self._write_buf):
				1108	return self.raw.tell() + len(self._write_buf)
				1109	else:
				1110	return self.raw.tell() - len(self._read_buf)
				1111
				1112	def read(self, n=None):
				1113	if n is None:
				1114	n = -1
				1115	self.flush()
				1116	return BufferedReader.read(self, n)
				1117
				1118	def readinto(self, b):
				1119	self.flush()
				1120	return BufferedReader.readinto(self, b)
				1121
				1122	def peek(self, n=0):
				1123	self.flush()
				1124	return BufferedReader.peek(self, n)
				1125
				1126	def read1(self, n):
				1127	self.flush()
				1128	return BufferedReader.read1(self, n)
				1129
				1130	def write(self, b):
				1131	if self._read_buf:
				1132	self.raw.seek(-len(self._read_buf), 1) # Undo readahead
				1133	self._read_buf = b""
				1134	return BufferedWriter.write(self, b)
				1135
				1136
				1137	class TextIOBase(IOBase):
				1138
				1139	"""Base class for text I/O.
				1140
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1141	This class provides a character and line based interface to stream
				1142	I/O. There is no readinto method because Python's character strings
				1143	are immutable. There is no public constructor.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1144	"""
				1145
				1146	def read(self, n = -1):
				1147	"""read(n: int = -1) -> unicode. Read at most n characters from stream.
				1148
				1149	Read from underlying buffer until we have n characters or we hit EOF.
				1150	If n is negative or omitted, read until EOF.
				1151	"""
				1152	self._unsupported("read")
				1153
				1154	def write(self, s):
				1155	"""write(s: unicode) -> int. Write string s to stream."""
				1156	self._unsupported("write")
				1157
				1158	def truncate(self, pos = None):
				1159	"""truncate(pos: int = None) -> int. Truncate size to pos."""
				1160	self.flush()
				1161	if pos is None:
				1162	pos = self.tell()
				1163	self.seek(pos)
				1164	return self.buffer.truncate()
				1165
				1166	def readline(self):
				1167	"""readline() -> unicode. Read until newline or EOF.
				1168
				1169	Returns an empty string if EOF is hit immediately.
				1170	"""
				1171	self._unsupported("readline")
				1172
				1173	@property
				1174	def encoding(self):
				1175	"""Subclasses should override."""
				1176	return None
				1177
				1178	@property
				1179	def newlines(self):
				1180	"""newlines -> None \| unicode \| tuple of unicode. Line endings translated
				1181	so far.
				1182
				1183	Only line endings translated during reading are considered.
				1184
				1185	Subclasses should override.
				1186	"""
				1187	return None
				1188
				1189
				1190	class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
				1191	"""Codec used when reading a file in universal newlines mode.
				1192	It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
				1193	It also records the types of newlines encountered.
				1194	When used with translate=False, it ensures that the newline sequence is
				1195	returned in one piece.
				1196	"""
				1197	def __init__(self, decoder, translate, errors='strict'):
				1198	codecs.IncrementalDecoder.__init__(self, errors=errors)
				1199	self.buffer = b''
				1200	self.translate = translate
				1201	self.decoder = decoder
				1202	self.seennl = 0
				1203
				1204	def decode(self, input, final=False):
				1205	# decode input (with the eventual \r from a previous pass)
				1206	if self.buffer:
				1207	input = self.buffer + input
				1208
				1209	output = self.decoder.decode(input, final=final)
				1210
				1211	# retain last \r even when not translating data:
				1212	# then readline() is sure to get \r\n in one pass
				1213	if output.endswith("\r") and not final:
				1214	output = output[:-1]
				1215	self.buffer = b'\r'
				1216	else:
				1217	self.buffer = b''
				1218
				1219	# Record which newlines are read
				1220	crlf = output.count('\r\n')
				1221	cr = output.count('\r') - crlf
				1222	lf = output.count('\n') - crlf
				1223	self.seennl \|= (lf and self._LF) \| (cr and self._CR) \
				1224	\| (crlf and self._CRLF)
				1225
				1226	if self.translate:
				1227	if crlf:
				1228	output = output.replace("\r\n", "\n")
				1229	if cr:
				1230	output = output.replace("\r", "\n")
				1231
				1232	return output
				1233
				1234	def getstate(self):
				1235	buf, flag = self.decoder.getstate()
				1236	return buf + self.buffer, flag
				1237
				1238	def setstate(self, state):
				1239	buf, flag = state
				1240	if buf.endswith(b'\r'):
				1241	self.buffer = b'\r'
				1242	buf = buf[:-1]
				1243	else:
				1244	self.buffer = b''
				1245	self.decoder.setstate((buf, flag))
				1246
				1247	def reset(self):
				1248	self.seennl = 0
				1249	self.buffer = b''
				1250	self.decoder.reset()
				1251
				1252	_LF = 1
				1253	_CR = 2
				1254	_CRLF = 4
				1255
				1256	@property
				1257	def newlines(self):
				1258	return (None,
				1259	"\n",
				1260	"\r",
				1261	("\r", "\n"),
				1262	"\r\n",
				1263	("\n", "\r\n"),
				1264	("\r", "\r\n"),
				1265	("\r", "\n", "\r\n")
				1266	)[self.seennl]
				1267
				1268
				1269	class TextIOWrapper(TextIOBase):
				1270
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1271	r"""TextIOWrapper(buffer[, encoding[, errors[, newline[, line_buffering]]]])
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1272
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1273	Character and line based layer over a BufferedIOBase object, buffer.
				1274
				1275	encoding gives the name of the encoding that the stream will be
				1276	decoded or encoded with. It defaults to locale.getpreferredencoding.
				1277
				1278	errors determines the strictness of encoding and decoding (see the
				1279	codecs.register) and defaults to "strict".
				1280
				1281	newline can be None, '', '\n', '\r', or '\r\n'. It controls the
				1282	handling of line endings. If it is None, universal newlines is
				1283	enabled. With this enabled, on input, the lines endings '\n', '\r',
				1284	or '\r\n' are translated to '\n' before being returned to the
				1285	caller. Conversely, on output, '\n' is translated to the system
				1286	default line seperator, os.linesep. If newline is any other of its
				1287	legal values, that newline becomes the newline when the file is read
				1288	and it is returned untranslated. On output, '\n' is converted to the
				1289	newline.
				1290
				1291	If line_buffering is True, a call to flush is implied when a call to
				1292	write contains a newline character.
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1293	"""
				1294
				1295	_CHUNK_SIZE = 128
				1296
				1297	def __init__(self, buffer, encoding=None, errors=None, newline=None,
				1298	line_buffering=False):
				1299	if newline not in (None, "", "\n", "\r", "\r\n"):
				1300	raise ValueError("illegal newline value: %r" % (newline,))
				1301	if encoding is None:
				1302	try:
				1303	encoding = os.device_encoding(buffer.fileno())
				1304	except (AttributeError, UnsupportedOperation):
				1305	pass
				1306	if encoding is None:
				1307	try:
				1308	import locale
				1309	except ImportError:
				1310	# Importing locale may fail if Python is being built
				1311	encoding = "ascii"
				1312	else:
				1313	encoding = locale.getpreferredencoding()
				1314
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	1315	if not isinstance(encoding, basestring):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1316	raise ValueError("invalid encoding: %r" % encoding)
				1317
				1318	if errors is None:
				1319	errors = "strict"
				1320	else:
Christian Heimes	3784c6b	2008-03-26 23:13:59 +0000	[diff] [blame]	1321	if not isinstance(errors, basestring):
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1322	raise ValueError("invalid errors: %r" % errors)
				1323
				1324	self.buffer = buffer
				1325	self._line_buffering = line_buffering
				1326	self._encoding = encoding
				1327	self._errors = errors
				1328	self._readuniversal = not newline
				1329	self._readtranslate = newline is None
				1330	self._readnl = newline
				1331	self._writetranslate = newline != ''
				1332	self._writenl = newline or os.linesep
				1333	self._encoder = None
				1334	self._decoder = None
				1335	self._decoded_chars = '' # buffer for text returned from decoder
				1336	self._decoded_chars_used = 0 # offset into _decoded_chars for read()
				1337	self._snapshot = None # info for reconstructing decoder state
				1338	self._seekable = self._telling = self.buffer.seekable()
				1339
				1340	# self._snapshot is either None, or a tuple (dec_flags, next_input)
				1341	# where dec_flags is the second (integer) item of the decoder state
				1342	# and next_input is the chunk of input bytes that comes next after the
				1343	# snapshot point. We use this to reconstruct decoder states in tell().
				1344
				1345	# Naming convention:
				1346	# - "bytes_..." for integer variables that count input bytes
				1347	# - "chars_..." for integer variables that count decoded characters
				1348
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1349	@property
				1350	def encoding(self):
				1351	return self._encoding
				1352
				1353	@property
				1354	def errors(self):
				1355	return self._errors
				1356
				1357	@property
				1358	def line_buffering(self):
				1359	return self._line_buffering
				1360
				1361	def seekable(self):
				1362	return self._seekable
				1363
				1364	def flush(self):
				1365	self.buffer.flush()
				1366	self._telling = self._seekable
				1367
				1368	def close(self):
				1369	try:
				1370	self.flush()
				1371	except:
				1372	pass # If flush() fails, just give up
				1373	self.buffer.close()
				1374
				1375	@property
				1376	def closed(self):
				1377	return self.buffer.closed
				1378
				1379	def fileno(self):
				1380	return self.buffer.fileno()
				1381
				1382	def isatty(self):
				1383	return self.buffer.isatty()
				1384
				1385	def write(self, s):
				1386	if self.closed:
				1387	raise ValueError("write to closed file")
				1388	if not isinstance(s, unicode):
				1389	raise TypeError("can't write %s to text stream" %
				1390	s.__class__.__name__)
				1391	length = len(s)
				1392	haslf = (self._writetranslate or self._line_buffering) and "\n" in s
				1393	if haslf and self._writetranslate and self._writenl != "\n":
				1394	s = s.replace("\n", self._writenl)
				1395	encoder = self._encoder or self._get_encoder()
				1396	# XXX What if we were just reading?
				1397	b = encoder.encode(s)
				1398	self.buffer.write(b)
				1399	if self._line_buffering and (haslf or "\r" in s):
				1400	self.flush()
				1401	self._snapshot = None
				1402	if self._decoder:
				1403	self._decoder.reset()
				1404	return length
				1405
				1406	def _get_encoder(self):
				1407	make_encoder = codecs.getincrementalencoder(self._encoding)
				1408	self._encoder = make_encoder(self._errors)
				1409	return self._encoder
				1410
				1411	def _get_decoder(self):
				1412	make_decoder = codecs.getincrementaldecoder(self._encoding)
				1413	decoder = make_decoder(self._errors)
				1414	if self._readuniversal:
				1415	decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
				1416	self._decoder = decoder
				1417	return decoder
				1418
				1419	# The following three methods implement an ADT for _decoded_chars.
				1420	# Text returned from the decoder is buffered here until the client
				1421	# requests it by calling our read() or readline() method.
				1422	def _set_decoded_chars(self, chars):
				1423	"""Set the _decoded_chars buffer."""
				1424	self._decoded_chars = chars
				1425	self._decoded_chars_used = 0
				1426
				1427	def _get_decoded_chars(self, n=None):
				1428	"""Advance into the _decoded_chars buffer."""
				1429	offset = self._decoded_chars_used
				1430	if n is None:
				1431	chars = self._decoded_chars[offset:]
				1432	else:
				1433	chars = self._decoded_chars[offset:offset + n]
				1434	self._decoded_chars_used += len(chars)
				1435	return chars
				1436
				1437	def _rewind_decoded_chars(self, n):
				1438	"""Rewind the _decoded_chars buffer."""
				1439	if self._decoded_chars_used < n:
				1440	raise AssertionError("rewind decoded_chars out of bounds")
				1441	self._decoded_chars_used -= n
				1442
				1443	def _read_chunk(self):
				1444	"""
				1445	Read and decode the next chunk of data from the BufferedReader.
				1446
				1447	The return value is True unless EOF was reached. The decoded string
				1448	is placed in self._decoded_chars (replacing its previous value).
				1449	The entire input chunk is sent to the decoder, though some of it
				1450	may remain buffered in the decoder, yet to be converted.
				1451	"""
				1452
				1453	if self._decoder is None:
				1454	raise ValueError("no decoder")
				1455
				1456	if self._telling:
				1457	# To prepare for tell(), we need to snapshot a point in the
				1458	# file where the decoder's input buffer is empty.
				1459
				1460	dec_buffer, dec_flags = self._decoder.getstate()
				1461	# Given this, we know there was a valid snapshot point
				1462	# len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
				1463
				1464	# Read a chunk, decode it, and put the result in self._decoded_chars.
				1465	input_chunk = self.buffer.read1(self._CHUNK_SIZE)
				1466	eof = not input_chunk
				1467	self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
				1468
				1469	if self._telling:
				1470	# At the snapshot point, len(dec_buffer) bytes before the read,
				1471	# the next input to be decoded is dec_buffer + input_chunk.
				1472	self._snapshot = (dec_flags, dec_buffer + input_chunk)
				1473
				1474	return not eof
				1475
				1476	def _pack_cookie(self, position, dec_flags=0,
				1477	bytes_to_feed=0, need_eof=0, chars_to_skip=0):
				1478	# The meaning of a tell() cookie is: seek to position, set the
				1479	# decoder flags to dec_flags, read bytes_to_feed bytes, feed them
				1480	# into the decoder with need_eof as the EOF flag, then skip
				1481	# chars_to_skip characters of the decoded result. For most simple
				1482	# decoders, tell() will often just give a byte offset in the file.
				1483	return (position \| (dec_flags<<64) \| (bytes_to_feed<<128) \|
				1484	(chars_to_skip<<192) \| bool(need_eof)<<256)
				1485
				1486	def _unpack_cookie(self, bigint):
				1487	rest, position = divmod(bigint, 1<<64)
				1488	rest, dec_flags = divmod(rest, 1<<64)
				1489	rest, bytes_to_feed = divmod(rest, 1<<64)
				1490	need_eof, chars_to_skip = divmod(rest, 1<<64)
				1491	return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
				1492
				1493	def tell(self):
				1494	if not self._seekable:
				1495	raise IOError("underlying stream is not seekable")
				1496	if not self._telling:
				1497	raise IOError("telling position disabled by next() call")
				1498	self.flush()
				1499	position = self.buffer.tell()
				1500	decoder = self._decoder
				1501	if decoder is None or self._snapshot is None:
				1502	if self._decoded_chars:
				1503	# This should never happen.
				1504	raise AssertionError("pending decoded text")
				1505	return position
				1506
				1507	# Skip backward to the snapshot point (see _read_chunk).
				1508	dec_flags, next_input = self._snapshot
				1509	position -= len(next_input)
				1510
				1511	# How many decoded characters have been used up since the snapshot?
				1512	chars_to_skip = self._decoded_chars_used
				1513	if chars_to_skip == 0:
				1514	# We haven't moved from the snapshot point.
				1515	return self._pack_cookie(position, dec_flags)
				1516
				1517	# Starting from the snapshot position, we will walk the decoder
				1518	# forward until it gives us enough decoded characters.
				1519	saved_state = decoder.getstate()
				1520	try:
				1521	# Note our initial start point.
				1522	decoder.setstate((b'', dec_flags))
				1523	start_pos = position
				1524	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
				1525	need_eof = 0
				1526
				1527	# Feed the decoder one byte at a time. As we go, note the
				1528	# nearest "safe start point" before the current location
				1529	# (a point where the decoder has nothing buffered, so seek()
				1530	# can safely start from there and advance to this location).
				1531	next_byte = bytearray(1)
				1532	for next_byte[0] in next_input:
				1533	bytes_fed += 1
				1534	chars_decoded += len(decoder.decode(next_byte))
				1535	dec_buffer, dec_flags = decoder.getstate()
				1536	if not dec_buffer and chars_decoded <= chars_to_skip:
				1537	# Decoder buffer is empty, so this is a safe start point.
				1538	start_pos += bytes_fed
				1539	chars_to_skip -= chars_decoded
				1540	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
				1541	if chars_decoded >= chars_to_skip:
				1542	break
				1543	else:
				1544	# We didn't get enough decoded data; signal EOF to get more.
				1545	chars_decoded += len(decoder.decode(b'', final=True))
				1546	need_eof = 1
				1547	if chars_decoded < chars_to_skip:
				1548	raise IOError("can't reconstruct logical file position")
				1549
				1550	# The returned cookie corresponds to the last safe start point.
				1551	return self._pack_cookie(
				1552	start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
				1553	finally:
				1554	decoder.setstate(saved_state)
				1555
				1556	def seek(self, cookie, whence=0):
				1557	if not self._seekable:
				1558	raise IOError("underlying stream is not seekable")
				1559	if whence == 1: # seek relative to current position
				1560	if cookie != 0:
				1561	raise IOError("can't do nonzero cur-relative seeks")
				1562	# Seeking to the current position should attempt to
				1563	# sync the underlying buffer with the current position.
				1564	whence = 0
				1565	cookie = self.tell()
				1566	if whence == 2: # seek relative to end of file
				1567	if cookie != 0:
				1568	raise IOError("can't do nonzero end-relative seeks")
				1569	self.flush()
				1570	position = self.buffer.seek(0, 2)
				1571	self._set_decoded_chars('')
				1572	self._snapshot = None
				1573	if self._decoder:
				1574	self._decoder.reset()
				1575	return position
				1576	if whence != 0:
				1577	raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
				1578	(whence,))
				1579	if cookie < 0:
				1580	raise ValueError("negative seek position %r" % (cookie,))
				1581	self.flush()
				1582
				1583	# The strategy of seek() is to go back to the safe start point
				1584	# and replay the effect of read(chars_to_skip) from there.
				1585	start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
				1586	self._unpack_cookie(cookie)
				1587
				1588	# Seek back to the safe start point.
				1589	self.buffer.seek(start_pos)
				1590	self._set_decoded_chars('')
				1591	self._snapshot = None
				1592
				1593	# Restore the decoder to its state from the safe start point.
				1594	if self._decoder or dec_flags or chars_to_skip:
				1595	self._decoder = self._decoder or self._get_decoder()
				1596	self._decoder.setstate((b'', dec_flags))
				1597	self._snapshot = (dec_flags, b'')
				1598
				1599	if chars_to_skip:
				1600	# Just like _read_chunk, feed the decoder and save a snapshot.
				1601	input_chunk = self.buffer.read(bytes_to_feed)
				1602	self._set_decoded_chars(
				1603	self._decoder.decode(input_chunk, need_eof))
				1604	self._snapshot = (dec_flags, input_chunk)
				1605
				1606	# Skip chars_to_skip of the decoded characters.
				1607	if len(self._decoded_chars) < chars_to_skip:
				1608	raise IOError("can't restore logical file position")
				1609	self._decoded_chars_used = chars_to_skip
				1610
				1611	return cookie
				1612
				1613	def read(self, n=None):
				1614	if n is None:
				1615	n = -1
				1616	decoder = self._decoder or self._get_decoder()
				1617	if n < 0:
				1618	# Read everything.
				1619	result = (self._get_decoded_chars() +
				1620	decoder.decode(self.buffer.read(), final=True))
				1621	self._set_decoded_chars('')
				1622	self._snapshot = None
				1623	return result
				1624	else:
				1625	# Keep reading chunks until we have n characters to return.
				1626	eof = False
				1627	result = self._get_decoded_chars(n)
				1628	while len(result) < n and not eof:
				1629	eof = not self._read_chunk()
				1630	result += self._get_decoded_chars(n - len(result))
				1631	return result
				1632
				1633	def next(self):
				1634	self._telling = False
				1635	line = self.readline()
				1636	if not line:
				1637	self._snapshot = None
				1638	self._telling = self._seekable
				1639	raise StopIteration
				1640	return line
				1641
				1642	def readline(self, limit=None):
				1643	if limit is None:
				1644	limit = -1
				1645
				1646	# Grab all the decoded text (we will rewind any extra bits later).
				1647	line = self._get_decoded_chars()
				1648
				1649	start = 0
				1650	decoder = self._decoder or self._get_decoder()
				1651
				1652	pos = endpos = None
				1653	while True:
				1654	if self._readtranslate:
				1655	# Newlines are already translated, only search for \n
				1656	pos = line.find('\n', start)
				1657	if pos >= 0:
				1658	endpos = pos + 1
				1659	break
				1660	else:
				1661	start = len(line)
				1662
				1663	elif self._readuniversal:
				1664	# Universal newline search. Find any of \r, \r\n, \n
				1665	# The decoder ensures that \r\n are not split in two pieces
				1666
				1667	# In C we'd look for these in parallel of course.
				1668	nlpos = line.find("\n", start)
				1669	crpos = line.find("\r", start)
				1670	if crpos == -1:
				1671	if nlpos == -1:
				1672	# Nothing found
				1673	start = len(line)
				1674	else:
				1675	# Found \n
				1676	endpos = nlpos + 1
				1677	break
				1678	elif nlpos == -1:
				1679	# Found lone \r
				1680	endpos = crpos + 1
				1681	break
				1682	elif nlpos < crpos:
				1683	# Found \n
				1684	endpos = nlpos + 1
				1685	break
				1686	elif nlpos == crpos + 1:
				1687	# Found \r\n
				1688	endpos = crpos + 2
				1689	break
				1690	else:
				1691	# Found \r
				1692	endpos = crpos + 1
				1693	break
				1694	else:
				1695	# non-universal
				1696	pos = line.find(self._readnl)
				1697	if pos >= 0:
				1698	endpos = pos + len(self._readnl)
				1699	break
				1700
				1701	if limit >= 0 and len(line) >= limit:
				1702	endpos = limit # reached length limit
				1703	break
				1704
				1705	# No line ending seen yet - get more data
				1706	more_line = ''
				1707	while self._read_chunk():
				1708	if self._decoded_chars:
				1709	break
				1710	if self._decoded_chars:
				1711	line += self._get_decoded_chars()
				1712	else:
				1713	# end of file
				1714	self._set_decoded_chars('')
				1715	self._snapshot = None
				1716	return line
				1717
				1718	if limit >= 0 and endpos > limit:
				1719	endpos = limit # don't exceed limit
				1720
				1721	# Rewind _decoded_chars to just after the line ending we found.
				1722	self._rewind_decoded_chars(len(line) - endpos)
				1723	return line[:endpos]
				1724
				1725	@property
				1726	def newlines(self):
				1727	return self._decoder.newlines if self._decoder else None
				1728
				1729	class StringIO(TextIOWrapper):
				1730
Benjamin Peterson	7bb4d2d	2008-04-13 02:01:27 +0000	[diff] [blame]	1731	"""StringIO([initial_value[, encoding, [errors, [newline]]]])
				1732
				1733	An in-memory stream for text. The initial_value argument sets the
				1734	value of object. The other arguments are like those of TextIOWrapper's
				1735	constructor.
				1736	"""
Christian Heimes	1a6387e	2008-03-26 12:49:49 +0000	[diff] [blame]	1737
				1738	def __init__(self, initial_value="", encoding="utf-8",
				1739	errors="strict", newline="\n"):
				1740	super(StringIO, self).__init__(BytesIO(),
				1741	encoding=encoding,
				1742	errors=errors,
				1743	newline=newline)
				1744	if initial_value:
				1745	if not isinstance(initial_value, unicode):
				1746	initial_value = unicode(initial_value)
				1747	self.write(initial_value)
				1748	self.seek(0)
				1749
				1750	def getvalue(self):
				1751	self.flush()
				1752	return self.buffer.getvalue().decode(self._encoding, self._errors)