Blame - Modules/_textio.c - platform/external/python/cpython2

blob: 145f8eaf9c1cc17296244d22da1b8c5c7228a695 [file] [log] [blame]

Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1	/*
				2	An implementation of Text I/O as defined by PEP 3116 - "New I/O"
				3
				4	Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
				5
				6	Written by Amaury Forgeot d'Arc and Antoine Pitrou
				7	*/
				8
				9	#define PY_SSIZE_T_CLEAN
				10	#include "Python.h"
				11	#include "structmember.h"
				12	#include "_iomodule.h"
				13
				14	/* TextIOBase */
				15
				16	PyDoc_STRVAR(TextIOBase_doc,
				17	"Base class for text I/O.\n"
				18	"\n"
				19	"This class provides a character and line based interface to stream\n"
				20	"I/O. There is no readinto method because Python's character strings\n"
				21	"are immutable. There is no public constructor.\n"
				22	);
				23
				24	static PyObject *
				25	_unsupported(const char *message)
				26	{
				27	PyErr_SetString(IO_STATE->unsupported_operation, message);
				28	return NULL;
				29	}
				30
				31	PyDoc_STRVAR(TextIOBase_read_doc,
				32	"Read at most n characters from stream.\n"
				33	"\n"
				34	"Read from underlying buffer until we have n characters or we hit EOF.\n"
				35	"If n is negative or omitted, read until EOF.\n"
				36	);
				37
				38	static PyObject *
				39	TextIOBase_read(PyObject self, PyObject args)
				40	{
				41	return _unsupported("read");
				42	}
				43
				44	PyDoc_STRVAR(TextIOBase_readline_doc,
				45	"Read until newline or EOF.\n"
				46	"\n"
				47	"Returns an empty string if EOF is hit immediately.\n"
				48	);
				49
				50	static PyObject *
				51	TextIOBase_readline(PyObject self, PyObject args)
				52	{
				53	return _unsupported("readline");
				54	}
				55
				56	PyDoc_STRVAR(TextIOBase_write_doc,
				57	"Write string to stream.\n"
				58	"Returns the number of characters written (which is always equal to\n"
				59	"the length of the string).\n"
				60	);
				61
				62	static PyObject *
				63	TextIOBase_write(PyObject self, PyObject args)
				64	{
				65	return _unsupported("write");
				66	}
				67
				68	PyDoc_STRVAR(TextIOBase_encoding_doc,
				69	"Encoding of the text stream.\n"
				70	"\n"
				71	"Subclasses should override.\n"
				72	);
				73
				74	static PyObject *
				75	TextIOBase_encoding_get(PyObject self, void context)
				76	{
				77	Py_RETURN_NONE;
				78	}
				79
				80	PyDoc_STRVAR(TextIOBase_newlines_doc,
				81	"Line endings translated so far.\n"
				82	"\n"
				83	"Only line endings translated during reading are considered.\n"
				84	"\n"
				85	"Subclasses should override.\n"
				86	);
				87
				88	static PyObject *
				89	TextIOBase_newlines_get(PyObject self, void context)
				90	{
				91	Py_RETURN_NONE;
				92	}
				93
				94
				95	static PyMethodDef TextIOBase_methods[] = {
				96	{"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
				97	{"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
				98	{"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
				99	{NULL, NULL}
				100	};
				101
				102	static PyGetSetDef TextIOBase_getset[] = {
				103	{"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
				104	{"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
				105	{0}
				106	};
				107
				108	PyTypeObject PyTextIOBase_Type = {
				109	PyVarObject_HEAD_INIT(NULL, 0)
				110	"_io._TextIOBase", /tp_name/
				111	0, /tp_basicsize/
				112	0, /tp_itemsize/
				113	0, /tp_dealloc/
				114	0, /tp_print/
				115	0, /tp_getattr/
				116	0, /tp_setattr/
				117	0, /tp_compare /
				118	0, /tp_repr/
				119	0, /tp_as_number/
				120	0, /tp_as_sequence/
				121	0, /tp_as_mapping/
				122	0, /tp_hash /
				123	0, /tp_call/
				124	0, /tp_str/
				125	0, /tp_getattro/
				126	0, /tp_setattro/
				127	0, /tp_as_buffer/
				128	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /tp_flags/
				129	TextIOBase_doc, /* tp_doc */
				130	0, /* tp_traverse */
				131	0, /* tp_clear */
				132	0, /* tp_richcompare */
				133	0, /* tp_weaklistoffset */
				134	0, /* tp_iter */
				135	0, /* tp_iternext */
				136	TextIOBase_methods, /* tp_methods */
				137	0, /* tp_members */
				138	TextIOBase_getset, /* tp_getset */
				139	&PyIOBase_Type, /* tp_base */
				140	0, /* tp_dict */
				141	0, /* tp_descr_get */
				142	0, /* tp_descr_set */
				143	0, /* tp_dictoffset */
				144	0, /* tp_init */
				145	0, /* tp_alloc */
				146	0, /* tp_new */
				147	};
				148
				149
				150	/* IncrementalNewlineDecoder */
				151
				152	PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
				153	"Codec used when reading a file in universal newlines mode. It wraps\n"
				154	"another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
				155	"records the types of newlines encountered. When used with\n"
				156	"translate=False, it ensures that the newline sequence is returned in\n"
				157	"one piece. When used with decoder=None, it expects unicode strings as\n"
				158	"decode input and translates newlines without first invoking an external\n"
				159	"decoder.\n"
				160	);
				161
				162	typedef struct {
				163	PyObject_HEAD
				164	PyObject *decoder;
				165	PyObject *errors;
				166	int pendingcr:1;
				167	int translate:1;
				168	unsigned int seennl:3;
				169	} PyNewLineDecoderObject;
				170
				171	static int
				172	IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
				173	PyObject args, PyObject kwds)
				174	{
				175	PyObject *decoder;
				176	int translate;
				177	PyObject *errors = NULL;
				178	char *kwlist[] = {"decoder", "translate", "errors", NULL};
				179
				180	if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi\|O:IncrementalNewlineDecoder",
				181	kwlist, &decoder, &translate, &errors))
				182	return -1;
				183
				184	self->decoder = decoder;
				185	Py_INCREF(decoder);
				186
				187	if (errors == NULL) {
				188	self->errors = PyUnicode_FromString("strict");
				189	if (self->errors == NULL)
				190	return -1;
				191	}
				192	else {
				193	Py_INCREF(errors);
				194	self->errors = errors;
				195	}
				196
				197	self->translate = translate;
				198	self->seennl = 0;
				199	self->pendingcr = 0;
				200
				201	return 0;
				202	}
				203
				204	static void
				205	IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
				206	{
				207	Py_CLEAR(self->decoder);
				208	Py_CLEAR(self->errors);
				209	Py_TYPE(self)->tp_free((PyObject *)self);
				210	}
				211
				212	#define SEEN_CR 1
				213	#define SEEN_LF 2
				214	#define SEEN_CRLF 4
				215	#define SEEN_ALL (SEEN_CR \| SEEN_LF \| SEEN_CRLF)
				216
				217	PyObject *
				218	_PyIncrementalNewlineDecoder_decode(PyObject *_self,
				219	PyObject *input, int final)
				220	{
				221	PyObject *output;
				222	Py_ssize_t output_len;
				223	PyNewLineDecoderObject self = (PyNewLineDecoderObject ) _self;
				224
				225	if (self->decoder == NULL) {
				226	PyErr_SetString(PyExc_ValueError,
				227	"IncrementalNewlineDecoder.__init__ not called");
				228	return NULL;
				229	}
				230
				231	/* decode input (with the eventual \r from a previous pass) */
				232	if (self->decoder != Py_None) {
				233	output = PyObject_CallMethodObjArgs(self->decoder,
				234	_PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
				235	}
				236	else {
				237	output = input;
				238	Py_INCREF(output);
				239	}
				240
				241	if (output == NULL)
				242	return NULL;
				243
				244	if (!PyUnicode_Check(output)) {
				245	PyErr_SetString(PyExc_TypeError,
				246	"decoder should return a string result");
				247	goto error;
				248	}
				249
				250	output_len = PyUnicode_GET_SIZE(output);
				251	if (self->pendingcr && (final \|\| output_len > 0)) {
				252	Py_UNICODE *out;
				253	PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
				254	if (modified == NULL)
				255	goto error;
				256	out = PyUnicode_AS_UNICODE(modified);
				257	out[0] = '\r';
				258	memcpy(out + 1, PyUnicode_AS_UNICODE(output),
				259	output_len * sizeof(Py_UNICODE));
				260	Py_DECREF(output);
				261	output = modified;
				262	self->pendingcr = 0;
				263	output_len++;
				264	}
				265
				266	/* retain last \r even when not translating data:
				267	* then readline() is sure to get \r\n in one pass
				268	*/
				269	if (!final) {
				270	if (output_len > 0
				271	&& PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
				272
				273	if (Py_REFCNT(output) == 1) {
				274	if (PyUnicode_Resize(&output, output_len - 1) < 0)
				275	goto error;
				276	}
				277	else {
				278	PyObject *modified = PyUnicode_FromUnicode(
				279	PyUnicode_AS_UNICODE(output),
				280	output_len - 1);
				281	if (modified == NULL)
				282	goto error;
				283	Py_DECREF(output);
				284	output = modified;
				285	}
				286	self->pendingcr = 1;
				287	}
				288	}
				289
				290	/* Record which newlines are read and do newline translation if desired,
				291	all in one pass. */
				292	{
				293	Py_UNICODE *in_str;
				294	Py_ssize_t len;
				295	int seennl = self->seennl;
				296	int only_lf = 0;
				297
				298	in_str = PyUnicode_AS_UNICODE(output);
				299	len = PyUnicode_GET_SIZE(output);
				300
				301	if (len == 0)
				302	return output;
				303
				304	/* If, up to now, newlines are consistently \n, do a quick check
				305	for the \r byte with the libc's optimized memchr.
				306	*/
				307	if (seennl == SEEN_LF \|\| seennl == 0) {
				308	int has_cr, has_lf;
				309	has_lf = (seennl == SEEN_LF) \|\|
				310	(memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
				311	has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
				312	if (has_lf && !has_cr) {
				313	only_lf = 1;
				314	seennl = SEEN_LF;
				315	}
				316	}
				317
				318	if (!self->translate) {
				319	Py_UNICODE s, end;
				320	if (seennl == SEEN_ALL)
				321	goto endscan;
				322	if (only_lf)
				323	goto endscan;
				324	s = in_str;
				325	end = in_str + len;
				326	for (;;) {
				327	Py_UNICODE c;
				328	/* Fast loop for non-control characters */
				329	while (*s > '\r')
				330	s++;
				331	c = *s++;
				332	if (c == '\n')
				333	seennl \|= SEEN_LF;
				334	else if (c == '\r') {
				335	if (*s == '\n') {
				336	seennl \|= SEEN_CRLF;
				337	s++;
				338	}
				339	else
				340	seennl \|= SEEN_CR;
				341	}
				342	if (s > end)
				343	break;
				344	if (seennl == SEEN_ALL)
				345	break;
				346	}
				347	endscan:
				348	;
				349	}
				350	else if (!only_lf) {
				351	PyObject *translated = NULL;
				352	Py_UNICODE *out_str;
				353	Py_UNICODE in, out, *end;
				354	if (Py_REFCNT(output) != 1) {
				355	/* We could try to optimize this so that we only do a copy
				356	when there is something to translate. On the other hand,
				357	most decoders should only output non-shared strings, i.e.
				358	translation is done in place. */
				359	translated = PyUnicode_FromUnicode(NULL, len);
				360	if (translated == NULL)
				361	goto error;
				362	assert(Py_REFCNT(translated) == 1);
				363	memcpy(PyUnicode_AS_UNICODE(translated),
				364	PyUnicode_AS_UNICODE(output),
				365	len * sizeof(Py_UNICODE));
				366	}
				367	else {
				368	translated = output;
				369	}
				370	out_str = PyUnicode_AS_UNICODE(translated);
				371	in = in_str;
				372	out = out_str;
				373	end = in_str + len;
				374	for (;;) {
				375	Py_UNICODE c;
				376	/* Fast loop for non-control characters */
				377	while ((c = *in++) > '\r')
				378	*out++ = c;
				379	if (c == '\n') {
				380	*out++ = c;
				381	seennl \|= SEEN_LF;
				382	continue;
				383	}
				384	if (c == '\r') {
				385	if (*in == '\n') {
				386	in++;
				387	seennl \|= SEEN_CRLF;
				388	}
				389	else
				390	seennl \|= SEEN_CR;
				391	*out++ = '\n';
				392	continue;
				393	}
				394	if (in > end)
				395	break;
				396	*out++ = c;
				397	}
				398	if (translated != output) {
				399	Py_DECREF(output);
				400	output = translated;
				401	}
				402	if (out - out_str != len) {
				403	if (PyUnicode_Resize(&output, out - out_str) < 0)
				404	goto error;
				405	}
				406	}
				407	self->seennl \|= seennl;
				408	}
				409
				410	return output;
				411
				412	error:
				413	Py_DECREF(output);
				414	return NULL;
				415	}
				416
				417	static PyObject *
				418	IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
				419	PyObject args, PyObject kwds)
				420	{
				421	char *kwlist[] = {"input", "final", NULL};
				422	PyObject *input;
				423	int final = 0;
				424
				425	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O\|i:IncrementalNewlineDecoder",
				426	kwlist, &input, &final))
				427	return NULL;
				428	return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
				429	}
				430
				431	static PyObject *
				432	IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject self, PyObject args)
				433	{
				434	PyObject *buffer;
				435	unsigned PY_LONG_LONG flag;
				436
				437	if (self->decoder != Py_None) {
				438	PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
				439	_PyIO_str_getstate, NULL);
				440	if (state == NULL)
				441	return NULL;
				442	if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
				443	Py_DECREF(state);
				444	return NULL;
				445	}
				446	Py_INCREF(buffer);
				447	Py_DECREF(state);
				448	}
				449	else {
				450	buffer = PyBytes_FromString("");
				451	flag = 0;
				452	}
				453	flag <<= 1;
				454	if (self->pendingcr)
				455	flag \|= 1;
				456	return Py_BuildValue("NK", buffer, flag);
				457	}
				458
				459	static PyObject *
				460	IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject self, PyObject state)
				461	{
				462	PyObject *buffer;
				463	unsigned PY_LONG_LONG flag;
				464
				465	if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
				466	return NULL;
				467
				468	self->pendingcr = (int) flag & 1;
				469	flag >>= 1;
				470
				471	if (self->decoder != Py_None)
				472	return PyObject_CallMethod(self->decoder,
				473	"setstate", "((OK))", buffer, flag);
				474	else
				475	Py_RETURN_NONE;
				476	}
				477
				478	static PyObject *
				479	IncrementalNewlineDecoder_reset(PyNewLineDecoderObject self, PyObject args)
				480	{
				481	self->seennl = 0;
				482	self->pendingcr = 0;
				483	if (self->decoder != Py_None)
				484	return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
				485	else
				486	Py_RETURN_NONE;
				487	}
				488
				489	static PyObject *
				490	IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject self, void context)
				491	{
				492	switch (self->seennl) {
				493	case SEEN_CR:
				494	return PyUnicode_FromString("\r");
				495	case SEEN_LF:
				496	return PyUnicode_FromString("\n");
				497	case SEEN_CRLF:
				498	return PyUnicode_FromString("\r\n");
				499	case SEEN_CR \| SEEN_LF:
				500	return Py_BuildValue("ss", "\r", "\n");
				501	case SEEN_CR \| SEEN_CRLF:
				502	return Py_BuildValue("ss", "\r", "\r\n");
				503	case SEEN_LF \| SEEN_CRLF:
				504	return Py_BuildValue("ss", "\n", "\r\n");
				505	case SEEN_CR \| SEEN_LF \| SEEN_CRLF:
				506	return Py_BuildValue("sss", "\r", "\n", "\r\n");
				507	default:
				508	Py_RETURN_NONE;
				509	}
				510
				511	}
				512
				513
				514	static PyMethodDef IncrementalNewlineDecoder_methods[] = {
				515	{"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS\|METH_KEYWORDS},
				516	{"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
				517	{"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
				518	{"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
				519	{0}
				520	};
				521
				522	static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
				523	{"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
				524	{0}
				525	};
				526
				527	PyTypeObject PyIncrementalNewlineDecoder_Type = {
				528	PyVarObject_HEAD_INIT(NULL, 0)
				529	"_io.IncrementalNewlineDecoder", /tp_name/
				530	sizeof(PyNewLineDecoderObject), /tp_basicsize/
				531	0, /tp_itemsize/
				532	(destructor)IncrementalNewlineDecoder_dealloc, /tp_dealloc/
				533	0, /tp_print/
				534	0, /tp_getattr/
				535	0, /tp_setattr/
				536	0, /tp_compare /
				537	0, /tp_repr/
				538	0, /tp_as_number/
				539	0, /tp_as_sequence/
				540	0, /tp_as_mapping/
				541	0, /tp_hash /
				542	0, /tp_call/
				543	0, /tp_str/
				544	0, /tp_getattro/
				545	0, /tp_setattro/
				546	0, /tp_as_buffer/
				547	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /tp_flags/
				548	IncrementalNewlineDecoder_doc, /* tp_doc */
				549	0, /* tp_traverse */
				550	0, /* tp_clear */
				551	0, /* tp_richcompare */
				552	0, /tp_weaklistoffset/
				553	0, /* tp_iter */
				554	0, /* tp_iternext */
				555	IncrementalNewlineDecoder_methods, /* tp_methods */
				556	0, /* tp_members */
				557	IncrementalNewlineDecoder_getset, /* tp_getset */
				558	0, /* tp_base */
				559	0, /* tp_dict */
				560	0, /* tp_descr_get */
				561	0, /* tp_descr_set */
				562	0, /* tp_dictoffset */
				563	(initproc)IncrementalNewlineDecoder_init, /* tp_init */
				564	0, /* tp_alloc */
				565	PyType_GenericNew, /* tp_new */
				566	};
				567
				568
				569	/* TextIOWrapper */
				570
				571	PyDoc_STRVAR(TextIOWrapper_doc,
				572	"Character and line based layer over a BufferedIOBase object, buffer.\n"
				573	"\n"
				574	"encoding gives the name of the encoding that the stream will be\n"
				575	"decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
				576	"\n"
				577	"errors determines the strictness of encoding and decoding (see the\n"
				578	"codecs.register) and defaults to \"strict\".\n"
				579	"\n"
				580	"newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
				581	"handling of line endings. If it is None, universal newlines is\n"
				582	"enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
				583	"or '\\r\\n' are translated to '\\n' before being returned to the\n"
				584	"caller. Conversely, on output, '\\n' is translated to the system\n"
				585	"default line seperator, os.linesep. If newline is any other of its\n"
				586	"legal values, that newline becomes the newline when the file is read\n"
				587	"and it is returned untranslated. On output, '\\n' is converted to the\n"
				588	"newline.\n"
				589	"\n"
				590	"If line_buffering is True, a call to flush is implied when a call to\n"
				591	"write contains a newline character."
				592	);
				593
				594	typedef PyObject *
				595	(encodefunc_t)(PyObject , PyObject *);
				596
				597	typedef struct
				598	{
				599	PyObject_HEAD
				600	int ok; /* initialized? */
				601	Py_ssize_t chunk_size;
				602	PyObject *buffer;
				603	PyObject *encoding;
				604	PyObject *encoder;
				605	PyObject *decoder;
				606	PyObject *readnl;
				607	PyObject *errors;
				608	const char writenl; / utf-8 encoded, NULL stands for \n */
				609	char line_buffering;
				610	char readuniversal;
				611	char readtranslate;
				612	char writetranslate;
				613	char seekable;
				614	char telling;
				615	/* Specialized encoding func (see below) */
				616	encodefunc_t encodefunc;
				617
				618	/* Reads and writes are internally buffered in order to speed things up.
				619	However, any read will first flush the write buffer if itsn't empty.
				620
				621	Please also note that text to be written is first encoded before being
				622	buffered. This is necessary so that encoding errors are immediately
				623	reported to the caller, but it unfortunately means that the
				624	IncrementalEncoder (whose encode() method is always written in Python)
				625	becomes a bottleneck for small writes.
				626	*/
				627	PyObject decoded_chars; / buffer for text returned from decoder */
				628	Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
				629	PyObject pending_bytes; / list of bytes objects waiting to be
				630	written, or NULL */
				631	Py_ssize_t pending_bytes_count;
				632	PyObject *snapshot;
				633	/* snapshot is either None, or a tuple (dec_flags, next_input) where
				634	* dec_flags is the second (integer) item of the decoder state and
				635	* next_input is the chunk of input bytes that comes next after the
				636	* snapshot point. We use this to reconstruct decoder states in tell().
				637	*/
				638
				639	/* Cache raw object if it's a FileIO object */
				640	PyObject *raw;
				641
				642	PyObject *weakreflist;
				643	PyObject *dict;
				644	} PyTextIOWrapperObject;
				645
				646
				647	/* A couple of specialized cases in order to bypass the slow incremental
				648	encoding methods for the most popular encodings. */
				649
				650	static PyObject *
				651	ascii_encode(PyTextIOWrapperObject self, PyObject text)
				652	{
				653	return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
				654	PyUnicode_GET_SIZE(text),
				655	PyBytes_AS_STRING(self->errors));
				656	}
				657
				658	static PyObject *
				659	utf16be_encode(PyTextIOWrapperObject self, PyObject text)
				660	{
				661	return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				662	PyUnicode_GET_SIZE(text),
				663	PyBytes_AS_STRING(self->errors), 1);
				664	}
				665
				666	static PyObject *
				667	utf16le_encode(PyTextIOWrapperObject self, PyObject text)
				668	{
				669	return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				670	PyUnicode_GET_SIZE(text),
				671	PyBytes_AS_STRING(self->errors), -1);
				672	}
				673
				674	static PyObject *
				675	utf16_encode(PyTextIOWrapperObject self, PyObject text)
				676	{
				677	PyObject *res;
				678	res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				679	PyUnicode_GET_SIZE(text),
				680	PyBytes_AS_STRING(self->errors), 0);
				681	if (res == NULL)
				682	return NULL;
				683	/* Next writes will skip the BOM and use native byte ordering */
				684	#if defined(WORDS_BIGENDIAN)
				685	self->encodefunc = (encodefunc_t) utf16be_encode;
				686	#else
				687	self->encodefunc = (encodefunc_t) utf16le_encode;
				688	#endif
				689	return res;
				690	}
				691
				692
				693	static PyObject *
				694	utf8_encode(PyTextIOWrapperObject self, PyObject text)
				695	{
				696	return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
				697	PyUnicode_GET_SIZE(text),
				698	PyBytes_AS_STRING(self->errors));
				699	}
				700
				701	static PyObject *
				702	latin1_encode(PyTextIOWrapperObject self, PyObject text)
				703	{
				704	return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
				705	PyUnicode_GET_SIZE(text),
				706	PyBytes_AS_STRING(self->errors));
				707	}
				708
				709	/* Map normalized encoding names onto the specialized encoding funcs */
				710
				711	typedef struct {
				712	const char *name;
				713	encodefunc_t encodefunc;
				714	} encodefuncentry;
				715
				716	encodefuncentry encodefuncs[] = {
				717	{"ascii", (encodefunc_t) ascii_encode},
				718	{"iso8859-1", (encodefunc_t) latin1_encode},
				719	{"utf-16-be", (encodefunc_t) utf16be_encode},
				720	{"utf-16-le", (encodefunc_t) utf16le_encode},
				721	{"utf-16", (encodefunc_t) utf16_encode},
				722	{"utf-8", (encodefunc_t) utf8_encode},
				723	{NULL, NULL}
				724	};
				725
				726
				727	static int
				728	TextIOWrapper_init(PyTextIOWrapperObject self, PyObject args, PyObject *kwds)
				729	{
				730	char *kwlist[] = {"buffer", "encoding", "errors",
				731	"newline", "line_buffering",
				732	NULL};
				733	PyObject buffer, raw;
				734	char *encoding = NULL;
				735	char *errors = NULL;
				736	char *newline = NULL;
				737	int line_buffering = 0;
				738	_PyIO_State *state = IO_STATE;
				739
				740	PyObject *res;
				741	int r;
				742
				743	self->ok = 0;
				744	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O\|zzzi:fileio",
				745	kwlist, &buffer, &encoding, &errors,
				746	&newline, &line_buffering))
				747	return -1;
				748
				749	if (newline && newline[0] != '\0'
				750	&& !(newline[0] == '\n' && newline[1] == '\0')
				751	&& !(newline[0] == '\r' && newline[1] == '\0')
				752	&& !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
				753	PyErr_Format(PyExc_ValueError,
				754	"illegal newline value: %s", newline);
				755	return -1;
				756	}
				757
				758	Py_CLEAR(self->buffer);
				759	Py_CLEAR(self->encoding);
				760	Py_CLEAR(self->encoder);
				761	Py_CLEAR(self->decoder);
				762	Py_CLEAR(self->readnl);
				763	Py_CLEAR(self->decoded_chars);
				764	Py_CLEAR(self->pending_bytes);
				765	Py_CLEAR(self->snapshot);
				766	Py_CLEAR(self->errors);
				767	Py_CLEAR(self->raw);
				768	self->decoded_chars_used = 0;
				769	self->pending_bytes_count = 0;
				770	self->encodefunc = NULL;
				771
				772	if (encoding == NULL) {
				773	/* Try os.device_encoding(fileno) */
				774	PyObject *fileno;
				775	fileno = PyObject_CallMethod(buffer, "fileno", NULL);
				776	/* Ignore only AttributeError and UnsupportedOperation */
				777	if (fileno == NULL) {
				778	if (PyErr_ExceptionMatches(PyExc_AttributeError) \|\|
				779	PyErr_ExceptionMatches(state->unsupported_operation)) {
				780	PyErr_Clear();
				781	}
				782	else {
				783	goto error;
				784	}
				785	}
				786	else {
				787	self->encoding = PyObject_CallMethod(state->os_module,
				788	"device_encoding",
				789	"N", fileno);
				790	if (self->encoding == NULL)
				791	goto error;
				792	else if (!PyUnicode_Check(self->encoding))
				793	Py_CLEAR(self->encoding);
				794	}
				795	}
				796	if (encoding == NULL && self->encoding == NULL) {
				797	if (state->locale_module == NULL) {
				798	state->locale_module = PyImport_ImportModule("locale");
				799	if (state->locale_module == NULL)
				800	goto catch_ImportError;
				801	else
				802	goto use_locale;
				803	}
				804	else {
				805	use_locale:
				806	self->encoding = PyObject_CallMethod(
				807	state->locale_module, "getpreferredencoding", NULL);
				808	if (self->encoding == NULL) {
				809	catch_ImportError:
				810	/*
				811	Importing locale can raise a ImportError because of
				812	_functools, and locale.getpreferredencoding can raise a
				813	ImportError if _locale is not available. These will happen
				814	during module building.
				815	*/
				816	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
				817	PyErr_Clear();
				818	self->encoding = PyUnicode_FromString("ascii");
				819	}
				820	else
				821	goto error;
				822	}
				823	else if (!PyUnicode_Check(self->encoding))
				824	Py_CLEAR(self->encoding);
				825	}
				826	}
				827	if (self->encoding != NULL)
				828	encoding = _PyUnicode_AsString(self->encoding);
				829	else if (encoding != NULL) {
				830	self->encoding = PyUnicode_FromString(encoding);
				831	if (self->encoding == NULL)
				832	goto error;
				833	}
				834	else {
				835	PyErr_SetString(PyExc_IOError,
				836	"could not determine default encoding");
				837	}
				838
				839	if (errors == NULL)
				840	errors = "strict";
				841	self->errors = PyBytes_FromString(errors);
				842	if (self->errors == NULL)
				843	goto error;
				844
				845	self->chunk_size = 8192;
				846	self->readuniversal = (newline == NULL \|\| newline[0] == '\0');
				847	self->line_buffering = line_buffering;
				848	self->readtranslate = (newline == NULL);
				849	if (newline) {
				850	self->readnl = PyUnicode_FromString(newline);
				851	if (self->readnl == NULL)
				852	return -1;
				853	}
				854	self->writetranslate = (newline == NULL \|\| newline[0] != '\0');
				855	if (!self->readuniversal && self->readnl) {
				856	self->writenl = _PyUnicode_AsString(self->readnl);
				857	if (!strcmp(self->writenl, "\n"))
				858	self->writenl = NULL;
				859	}
				860	#ifdef MS_WINDOWS
				861	else
				862	self->writenl = "\r\n";
				863	#endif
				864
				865	/* Build the decoder object */
				866	res = PyObject_CallMethod(buffer, "readable", NULL);
				867	if (res == NULL)
				868	goto error;
				869	r = PyObject_IsTrue(res);
				870	Py_DECREF(res);
				871	if (r == -1)
				872	goto error;
				873	if (r == 1) {
				874	self->decoder = PyCodec_IncrementalDecoder(
				875	encoding, errors);
				876	if (self->decoder == NULL)
				877	goto error;
				878
				879	if (self->readuniversal) {
				880	PyObject *incrementalDecoder = PyObject_CallFunction(
				881	(PyObject *)&PyIncrementalNewlineDecoder_Type,
				882	"Oi", self->decoder, (int)self->readtranslate);
				883	if (incrementalDecoder == NULL)
				884	goto error;
				885	Py_CLEAR(self->decoder);
				886	self->decoder = incrementalDecoder;
				887	}
				888	}
				889
				890	/* Build the encoder object */
				891	res = PyObject_CallMethod(buffer, "writable", NULL);
				892	if (res == NULL)
				893	goto error;
				894	r = PyObject_IsTrue(res);
				895	Py_DECREF(res);
				896	if (r == -1)
				897	goto error;
				898	if (r == 1) {
				899	PyObject *ci;
				900	self->encoder = PyCodec_IncrementalEncoder(
				901	encoding, errors);
				902	if (self->encoder == NULL)
				903	goto error;
				904	/* Get the normalized named of the codec */
				905	ci = _PyCodec_Lookup(encoding);
				906	if (ci == NULL)
				907	goto error;
				908	res = PyObject_GetAttrString(ci, "name");
				909	Py_DECREF(ci);
				910	if (res == NULL)
				911	PyErr_Clear();
				912	else if (PyUnicode_Check(res)) {
				913	encodefuncentry *e = encodefuncs;
				914	while (e->name != NULL) {
				915	if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
				916	self->encodefunc = e->encodefunc;
				917	break;
				918	}
				919	e++;
				920	}
				921	}
				922	Py_XDECREF(res);
				923	}
				924
				925	self->buffer = buffer;
				926	Py_INCREF(buffer);
				927
				928	if (Py_TYPE(buffer) == &PyBufferedReader_Type \|\|
				929	Py_TYPE(buffer) == &PyBufferedWriter_Type \|\|
				930	Py_TYPE(buffer) == &PyBufferedRandom_Type) {
				931	raw = PyObject_GetAttrString(buffer, "raw");
				932	/* Cache the raw FileIO object to speed up 'closed' checks */
				933	if (raw == NULL)
				934	PyErr_Clear();
				935	else if (Py_TYPE(raw) == &PyFileIO_Type)
				936	self->raw = raw;
				937	else
				938	Py_DECREF(raw);
				939	}
				940
				941	res = PyObject_CallMethod(buffer, "seekable", NULL);
				942	if (res == NULL)
				943	goto error;
				944	self->seekable = self->telling = PyObject_IsTrue(res);
				945	Py_DECREF(res);
				946
				947	self->ok = 1;
				948	return 0;
				949
				950	error:
				951	return -1;
				952	}
				953
				954	static int
				955	_TextIOWrapper_clear(PyTextIOWrapperObject *self)
				956	{
				957	if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
				958	return -1;
				959	self->ok = 0;
				960	Py_CLEAR(self->buffer);
				961	Py_CLEAR(self->encoding);
				962	Py_CLEAR(self->encoder);
				963	Py_CLEAR(self->decoder);
				964	Py_CLEAR(self->readnl);
				965	Py_CLEAR(self->decoded_chars);
				966	Py_CLEAR(self->pending_bytes);
				967	Py_CLEAR(self->snapshot);
				968	Py_CLEAR(self->errors);
				969	Py_CLEAR(self->raw);
				970	return 0;
				971	}
				972
				973	static void
				974	TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
				975	{
				976	if (_TextIOWrapper_clear(self) < 0)
				977	return;
				978	_PyObject_GC_UNTRACK(self);
				979	if (self->weakreflist != NULL)
				980	PyObject_ClearWeakRefs((PyObject *)self);
				981	Py_CLEAR(self->dict);
				982	Py_TYPE(self)->tp_free((PyObject *)self);
				983	}
				984
				985	static int
				986	TextIOWrapper_traverse(PyTextIOWrapperObject self, visitproc visit, void arg)
				987	{
				988	Py_VISIT(self->buffer);
				989	Py_VISIT(self->encoding);
				990	Py_VISIT(self->encoder);
				991	Py_VISIT(self->decoder);
				992	Py_VISIT(self->readnl);
				993	Py_VISIT(self->decoded_chars);
				994	Py_VISIT(self->pending_bytes);
				995	Py_VISIT(self->snapshot);
				996	Py_VISIT(self->errors);
				997	Py_VISIT(self->raw);
				998
				999	Py_VISIT(self->dict);
				1000	return 0;
				1001	}
				1002
				1003	static int
				1004	TextIOWrapper_clear(PyTextIOWrapperObject *self)
				1005	{
				1006	if (_TextIOWrapper_clear(self) < 0)
				1007	return -1;
				1008	Py_CLEAR(self->dict);
				1009	return 0;
				1010	}
				1011
				1012	static PyObject *
				1013	TextIOWrapper_closed_get(PyTextIOWrapperObject self, void context);
				1014
				1015	/* This macro takes some shortcuts to make the common case faster. */
				1016	#define CHECK_CLOSED(self) \
				1017	do { \
				1018	int r; \
				1019	PyObject *_res; \
				1020	if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
				1021	if (self->raw != NULL) \
				1022	r = _PyFileIO_closed(self->raw); \
				1023	else { \
				1024	_res = TextIOWrapper_closed_get(self, NULL); \
				1025	if (_res == NULL) \
				1026	return NULL; \
				1027	r = PyObject_IsTrue(_res); \
				1028	Py_DECREF(_res); \
				1029	if (r < 0) \
				1030	return NULL; \
				1031	} \
				1032	if (r > 0) { \
				1033	PyErr_SetString(PyExc_ValueError, \
				1034	"I/O operation on closed file."); \
				1035	return NULL; \
				1036	} \
				1037	} \
				1038	else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
				1039	return NULL; \
				1040	} while (0)
				1041
				1042	#define CHECK_INITIALIZED(self) \
				1043	if (self->ok <= 0) { \
				1044	PyErr_SetString(PyExc_ValueError, \
				1045	"I/O operation on uninitialized object"); \
				1046	return NULL; \
				1047	}
				1048
				1049	#define CHECK_INITIALIZED_INT(self) \
				1050	if (self->ok <= 0) { \
				1051	PyErr_SetString(PyExc_ValueError, \
				1052	"I/O operation on uninitialized object"); \
				1053	return -1; \
				1054	}
				1055
				1056
				1057	Py_LOCAL_INLINE(const Py_UNICODE *)
				1058	findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
				1059	{
				1060	/* like wcschr, but doesn't stop at NULL characters */
				1061	while (size-- > 0) {
				1062	if (*s == ch)
				1063	return s;
				1064	s++;
				1065	}
				1066	return NULL;
				1067	}
				1068
				1069	/* Flush the internal write buffer. This doesn't explicitly flush the
				1070	underlying buffered object, though. */
				1071	static int
				1072	_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
				1073	{
				1074	PyObject b, ret;
				1075
				1076	if (self->pending_bytes == NULL)
				1077	return 0;
				1078	b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
				1079	if (b == NULL)
				1080	return -1;
				1081	ret = PyObject_CallMethodObjArgs(self->buffer,
				1082	_PyIO_str_write, b, NULL);
				1083	Py_DECREF(b);
				1084	if (ret == NULL)
				1085	return -1;
				1086	Py_DECREF(ret);
				1087	Py_CLEAR(self->pending_bytes);
				1088	self->pending_bytes_count = 0;
				1089	return 0;
				1090	}
				1091
				1092	static PyObject *
				1093	TextIOWrapper_write(PyTextIOWrapperObject self, PyObject args)
				1094	{
				1095	PyObject *ret;
				1096	PyObject text; / owned reference */
				1097	PyObject *b;
				1098	Py_ssize_t textlen;
				1099	int haslf = 0;
				1100	int needflush = 0;
				1101
				1102	CHECK_INITIALIZED(self);
				1103
				1104	if (!PyArg_ParseTuple(args, "U:write", &text)) {
				1105	return NULL;
				1106	}
				1107
				1108	CHECK_CLOSED(self);
				1109
				1110	Py_INCREF(text);
				1111
				1112	textlen = PyUnicode_GetSize(text);
				1113
				1114	if ((self->writetranslate && self->writenl != NULL) \|\| self->line_buffering)
				1115	if (findchar(PyUnicode_AS_UNICODE(text),
				1116	PyUnicode_GET_SIZE(text), '\n'))
				1117	haslf = 1;
				1118
				1119	if (haslf && self->writetranslate && self->writenl != NULL) {
				1120	PyObject *newtext = PyObject_CallMethod(
				1121	text, "replace", "ss", "\n", self->writenl);
				1122	Py_DECREF(text);
				1123	if (newtext == NULL)
				1124	return NULL;
				1125	text = newtext;
				1126	}
				1127
				1128	if (self->line_buffering &&
				1129	(haslf \|\|
				1130	findchar(PyUnicode_AS_UNICODE(text),
				1131	PyUnicode_GET_SIZE(text), '\r')))
				1132	needflush = 1;
				1133
				1134	/* XXX What if we were just reading? */
				1135	if (self->encodefunc != NULL)
				1136	b = (self->encodefunc)((PyObject ) self, text);
				1137	else
				1138	b = PyObject_CallMethodObjArgs(self->encoder,
				1139	_PyIO_str_encode, text, NULL);
				1140	Py_DECREF(text);
				1141	if (b == NULL)
				1142	return NULL;
				1143
				1144	if (self->pending_bytes == NULL) {
				1145	self->pending_bytes = PyList_New(0);
				1146	if (self->pending_bytes == NULL) {
				1147	Py_DECREF(b);
				1148	return NULL;
				1149	}
				1150	self->pending_bytes_count = 0;
				1151	}
				1152	if (PyList_Append(self->pending_bytes, b) < 0) {
				1153	Py_DECREF(b);
				1154	return NULL;
				1155	}
				1156	self->pending_bytes_count += PyBytes_GET_SIZE(b);
				1157	Py_DECREF(b);
				1158	if (self->pending_bytes_count > self->chunk_size \|\| needflush) {
				1159	if (_TextIOWrapper_writeflush(self) < 0)
				1160	return NULL;
				1161	}
				1162
				1163	if (needflush) {
				1164	ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
				1165	if (ret == NULL)
				1166	return NULL;
				1167	Py_DECREF(ret);
				1168	}
				1169
				1170	Py_CLEAR(self->snapshot);
				1171
				1172	if (self->decoder) {
				1173	ret = PyObject_CallMethod(self->decoder, "reset", NULL);
				1174	if (ret == NULL)
				1175	return NULL;
				1176	Py_DECREF(ret);
				1177	}
				1178
				1179	return PyLong_FromSsize_t(textlen);
				1180	}
				1181
				1182	/* Steal a reference to chars and store it in the decoded_char buffer;
				1183	*/
				1184	static void
				1185	TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject self, PyObject chars)
				1186	{
				1187	Py_CLEAR(self->decoded_chars);
				1188	self->decoded_chars = chars;
				1189	self->decoded_chars_used = 0;
				1190	}
				1191
				1192	static PyObject *
				1193	TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
				1194	{
				1195	PyObject *chars;
				1196	Py_ssize_t avail;
				1197
				1198	if (self->decoded_chars == NULL)
				1199	return PyUnicode_FromStringAndSize(NULL, 0);
				1200
				1201	avail = (PyUnicode_GET_SIZE(self->decoded_chars)
				1202	- self->decoded_chars_used);
				1203
				1204	assert(avail >= 0);
				1205
				1206	if (n < 0 \|\| n > avail)
				1207	n = avail;
				1208
				1209	if (self->decoded_chars_used > 0 \|\| n < avail) {
				1210	chars = PyUnicode_FromUnicode(
				1211	PyUnicode_AS_UNICODE(self->decoded_chars)
				1212	+ self->decoded_chars_used, n);
				1213	if (chars == NULL)
				1214	return NULL;
				1215	}
				1216	else {
				1217	chars = self->decoded_chars;
				1218	Py_INCREF(chars);
				1219	}
				1220
				1221	self->decoded_chars_used += n;
				1222	return chars;
				1223	}
				1224
				1225	/* Read and decode the next chunk of data from the BufferedReader.
				1226	*/
				1227	static int
				1228	TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
				1229	{
				1230	PyObject *dec_buffer = NULL;
				1231	PyObject *dec_flags = NULL;
				1232	PyObject *input_chunk = NULL;
				1233	PyObject decoded_chars, chunk_size;
				1234	int eof;
				1235
				1236	/* The return value is True unless EOF was reached. The decoded string is
				1237	* placed in self._decoded_chars (replacing its previous value). The
				1238	* entire input chunk is sent to the decoder, though some of it may remain
				1239	* buffered in the decoder, yet to be converted.
				1240	*/
				1241
				1242	if (self->decoder == NULL) {
				1243	PyErr_SetString(PyExc_ValueError, "no decoder");
				1244	return -1;
				1245	}
				1246
				1247	if (self->telling) {
				1248	/* To prepare for tell(), we need to snapshot a point in the file
				1249	* where the decoder's input buffer is empty.
				1250	*/
				1251
				1252	PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
				1253	_PyIO_str_getstate, NULL);
				1254	if (state == NULL)
				1255	return -1;
				1256	/* Given this, we know there was a valid snapshot point
				1257	* len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
				1258	*/
				1259	if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
				1260	Py_DECREF(state);
				1261	return -1;
				1262	}
				1263	Py_INCREF(dec_buffer);
				1264	Py_INCREF(dec_flags);
				1265	Py_DECREF(state);
				1266	}
				1267
				1268	/* Read a chunk, decode it, and put the result in self._decoded_chars. */
				1269	chunk_size = PyLong_FromSsize_t(self->chunk_size);
				1270	if (chunk_size == NULL)
				1271	goto fail;
				1272	input_chunk = PyObject_CallMethodObjArgs(self->buffer,
				1273	_PyIO_str_read1, chunk_size, NULL);
				1274	Py_DECREF(chunk_size);
				1275	if (input_chunk == NULL)
				1276	goto fail;
				1277	assert(PyBytes_Check(input_chunk));
				1278
				1279	eof = (PyBytes_Size(input_chunk) == 0);
				1280
				1281	if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
				1282	decoded_chars = _PyIncrementalNewlineDecoder_decode(
				1283	self->decoder, input_chunk, eof);
				1284	}
				1285	else {
				1286	decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
				1287	_PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
				1288	}
				1289
				1290	/* TODO sanity check: isinstance(decoded_chars, unicode) */
				1291	if (decoded_chars == NULL)
				1292	goto fail;
				1293	TextIOWrapper_set_decoded_chars(self, decoded_chars);
				1294	if (PyUnicode_GET_SIZE(decoded_chars) > 0)
				1295	eof = 0;
				1296
				1297	if (self->telling) {
				1298	/* At the snapshot point, len(dec_buffer) bytes before the read, the
				1299	* next input to be decoded is dec_buffer + input_chunk.
				1300	*/
				1301	PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
				1302	if (next_input == NULL)
				1303	goto fail;
				1304	assert (PyBytes_Check(next_input));
				1305	Py_DECREF(dec_buffer);
				1306	Py_CLEAR(self->snapshot);
				1307	self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
				1308	}
				1309	Py_DECREF(input_chunk);
				1310
				1311	return (eof == 0);
				1312
				1313	fail:
				1314	Py_XDECREF(dec_buffer);
				1315	Py_XDECREF(dec_flags);
				1316	Py_XDECREF(input_chunk);
				1317	return -1;
				1318	}
				1319
				1320	static PyObject *
				1321	TextIOWrapper_read(PyTextIOWrapperObject self, PyObject args)
				1322	{
				1323	Py_ssize_t n = -1;
				1324	PyObject result = NULL, chunks = NULL;
				1325
				1326	CHECK_INITIALIZED(self);
				1327
				1328	if (!PyArg_ParseTuple(args, "\|n:read", &n))
				1329	return NULL;
				1330
				1331	CHECK_CLOSED(self);
				1332
				1333	if (_TextIOWrapper_writeflush(self) < 0)
				1334	return NULL;
				1335
				1336	if (n < 0) {
				1337	/* Read everything */
				1338	PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
				1339	PyObject *decoded;
				1340	if (bytes == NULL)
				1341	goto fail;
				1342	decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
				1343	bytes, Py_True, NULL);
				1344	Py_DECREF(bytes);
				1345	if (decoded == NULL)
				1346	goto fail;
				1347
				1348	result = TextIOWrapper_get_decoded_chars(self, -1);
				1349
				1350	if (result == NULL) {
				1351	Py_DECREF(decoded);
				1352	return NULL;
				1353	}
				1354
				1355	PyUnicode_AppendAndDel(&result, decoded);
				1356	if (result == NULL)
				1357	goto fail;
				1358
				1359	Py_CLEAR(self->snapshot);
				1360	return result;
				1361	}
				1362	else {
				1363	int res = 1;
				1364	Py_ssize_t remaining = n;
				1365
				1366	result = TextIOWrapper_get_decoded_chars(self, n);
				1367	if (result == NULL)
				1368	goto fail;
				1369	remaining -= PyUnicode_GET_SIZE(result);
				1370
				1371	/* Keep reading chunks until we have n characters to return */
				1372	while (remaining > 0) {
				1373	res = TextIOWrapper_read_chunk(self);
				1374	if (res < 0)
				1375	goto fail;
				1376	if (res == 0) /* EOF */
				1377	break;
				1378	if (chunks == NULL) {
				1379	chunks = PyList_New(0);
				1380	if (chunks == NULL)
				1381	goto fail;
				1382	}
				1383	if (PyList_Append(chunks, result) < 0)
				1384	goto fail;
				1385	Py_DECREF(result);
				1386	result = TextIOWrapper_get_decoded_chars(self, remaining);
				1387	if (result == NULL)
				1388	goto fail;
				1389	remaining -= PyUnicode_GET_SIZE(result);
				1390	}
				1391	if (chunks != NULL) {
				1392	if (result != NULL && PyList_Append(chunks, result) < 0)
				1393	goto fail;
				1394	Py_CLEAR(result);
				1395	result = PyUnicode_Join(_PyIO_empty_str, chunks);
				1396	if (result == NULL)
				1397	goto fail;
				1398	Py_CLEAR(chunks);
				1399	}
				1400	return result;
				1401	}
				1402	fail:
				1403	Py_XDECREF(result);
				1404	Py_XDECREF(chunks);
				1405	return NULL;
				1406	}
				1407
				1408
				1409	/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
				1410	that is to the NUL character. Otherwise the function will produce
				1411	incorrect results. */
				1412	static Py_UNICODE *
				1413	find_control_char(Py_UNICODE start, Py_UNICODE end, Py_UNICODE ch)
				1414	{
				1415	Py_UNICODE *s = start;
				1416	for (;;) {
				1417	while (*s > ch)
				1418	s++;
				1419	if (*s == ch)
				1420	return s;
				1421	if (s == end)
				1422	return NULL;
				1423	s++;
				1424	}
				1425	}
				1426
				1427	Py_ssize_t
				1428	_PyIO_find_line_ending(
				1429	int translated, int universal, PyObject *readnl,
				1430	Py_UNICODE start, Py_UNICODE end, Py_ssize_t *consumed)
				1431	{
				1432	Py_ssize_t len = end - start;
				1433
				1434	if (translated) {
				1435	/* Newlines are already translated, only search for \n */
				1436	Py_UNICODE *pos = find_control_char(start, end, '\n');
				1437	if (pos != NULL)
				1438	return pos - start + 1;
				1439	else {
				1440	*consumed = len;
				1441	return -1;
				1442	}
				1443	}
				1444	else if (universal) {
				1445	/* Universal newline search. Find any of \r, \r\n, \n
				1446	* The decoder ensures that \r\n are not split in two pieces
				1447	*/
				1448	Py_UNICODE *s = start;
				1449	for (;;) {
				1450	Py_UNICODE ch;
				1451	/* Fast path for non-control chars. The loop always ends
				1452	since the Py_UNICODE storage is NUL-terminated. */
				1453	while (*s > '\r')
				1454	s++;
				1455	if (s >= end) {
				1456	*consumed = len;
				1457	return -1;
				1458	}
				1459	ch = *s++;
				1460	if (ch == '\n')
				1461	return s - start;
				1462	if (ch == '\r') {
				1463	if (*s == '\n')
				1464	return s - start + 1;
				1465	else
				1466	return s - start;
				1467	}
				1468	}
				1469	}
				1470	else {
				1471	/* Non-universal mode. */
				1472	Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
				1473	Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
				1474	if (readnl_len == 1) {
				1475	Py_UNICODE *pos = find_control_char(start, end, nl[0]);
				1476	if (pos != NULL)
				1477	return pos - start + 1;
				1478	*consumed = len;
				1479	return -1;
				1480	}
				1481	else {
				1482	Py_UNICODE *s = start;
				1483	Py_UNICODE *e = end - readnl_len + 1;
				1484	Py_UNICODE *pos;
				1485	if (e < s)
				1486	e = s;
				1487	while (s < e) {
				1488	Py_ssize_t i;
				1489	Py_UNICODE *pos = find_control_char(s, end, nl[0]);
				1490	if (pos == NULL \|\| pos >= e)
				1491	break;
				1492	for (i = 1; i < readnl_len; i++) {
				1493	if (pos[i] != nl[i])
				1494	break;
				1495	}
				1496	if (i == readnl_len)
				1497	return pos - start + readnl_len;
				1498	s = pos + 1;
				1499	}
				1500	pos = find_control_char(e, end, nl[0]);
				1501	if (pos == NULL)
				1502	*consumed = len;
				1503	else
				1504	*consumed = pos - start;
				1505	return -1;
				1506	}
				1507	}
				1508	}
				1509
				1510	static PyObject *
				1511	_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
				1512	{
				1513	PyObject line = NULL, chunks = NULL, *remaining = NULL;
				1514	Py_ssize_t start, endpos, chunked, offset_to_buffer;
				1515	int res;
				1516
				1517	CHECK_CLOSED(self);
				1518
				1519	if (_TextIOWrapper_writeflush(self) < 0)
				1520	return NULL;
				1521
				1522	chunked = 0;
				1523
				1524	while (1) {
				1525	Py_UNICODE *ptr;
				1526	Py_ssize_t line_len;
				1527	Py_ssize_t consumed = 0;
				1528
				1529	/* First, get some data if necessary */
				1530	res = 1;
				1531	while (!self->decoded_chars \|\|
				1532	!PyUnicode_GET_SIZE(self->decoded_chars)) {
				1533	res = TextIOWrapper_read_chunk(self);
				1534	if (res < 0)
				1535	goto error;
				1536	if (res == 0)
				1537	break;
				1538	}
				1539	if (res == 0) {
				1540	/* end of file */
				1541	TextIOWrapper_set_decoded_chars(self, NULL);
				1542	Py_CLEAR(self->snapshot);
				1543	start = endpos = offset_to_buffer = 0;
				1544	break;
				1545	}
				1546
				1547	if (remaining == NULL) {
				1548	line = self->decoded_chars;
				1549	start = self->decoded_chars_used;
				1550	offset_to_buffer = 0;
				1551	Py_INCREF(line);
				1552	}
				1553	else {
				1554	assert(self->decoded_chars_used == 0);
				1555	line = PyUnicode_Concat(remaining, self->decoded_chars);
				1556	start = 0;
				1557	offset_to_buffer = PyUnicode_GET_SIZE(remaining);
				1558	Py_CLEAR(remaining);
				1559	if (line == NULL)
				1560	goto error;
				1561	}
				1562
				1563	ptr = PyUnicode_AS_UNICODE(line);
				1564	line_len = PyUnicode_GET_SIZE(line);
				1565
				1566	endpos = _PyIO_find_line_ending(
				1567	self->readtranslate, self->readuniversal, self->readnl,
				1568	ptr + start, ptr + line_len, &consumed);
				1569	if (endpos >= 0) {
				1570	endpos += start;
				1571	if (limit >= 0 && (endpos - start) + chunked >= limit)
				1572	endpos = start + limit - chunked;
				1573	break;
				1574	}
				1575
				1576	/* We can put aside up to `endpos` */
				1577	endpos = consumed + start;
				1578	if (limit >= 0 && (endpos - start) + chunked >= limit) {
				1579	/* Didn't find line ending, but reached length limit */
				1580	endpos = start + limit - chunked;
				1581	break;
				1582	}
				1583
				1584	if (endpos > start) {
				1585	/* No line ending seen yet - put aside current data */
				1586	PyObject *s;
				1587	if (chunks == NULL) {
				1588	chunks = PyList_New(0);
				1589	if (chunks == NULL)
				1590	goto error;
				1591	}
				1592	s = PyUnicode_FromUnicode(ptr + start, endpos - start);
				1593	if (s == NULL)
				1594	goto error;
				1595	if (PyList_Append(chunks, s) < 0) {
				1596	Py_DECREF(s);
				1597	goto error;
				1598	}
				1599	chunked += PyUnicode_GET_SIZE(s);
				1600	Py_DECREF(s);
				1601	}
				1602	/* There may be some remaining bytes we'll have to prepend to the
				1603	next chunk of data */
				1604	if (endpos < line_len) {
				1605	remaining = PyUnicode_FromUnicode(
				1606	ptr + endpos, line_len - endpos);
				1607	if (remaining == NULL)
				1608	goto error;
				1609	}
				1610	Py_CLEAR(line);
				1611	/* We have consumed the buffer */
				1612	TextIOWrapper_set_decoded_chars(self, NULL);
				1613	}
				1614
				1615	if (line != NULL) {
				1616	/* Our line ends in the current buffer */
				1617	self->decoded_chars_used = endpos - offset_to_buffer;
				1618	if (start > 0 \|\| endpos < PyUnicode_GET_SIZE(line)) {
				1619	if (start == 0 && Py_REFCNT(line) == 1) {
				1620	if (PyUnicode_Resize(&line, endpos) < 0)
				1621	goto error;
				1622	}
				1623	else {
				1624	PyObject *s = PyUnicode_FromUnicode(
				1625	PyUnicode_AS_UNICODE(line) + start, endpos - start);
				1626	Py_CLEAR(line);
				1627	if (s == NULL)
				1628	goto error;
				1629	line = s;
				1630	}
				1631	}
				1632	}
				1633	if (remaining != NULL) {
				1634	if (chunks == NULL) {
				1635	chunks = PyList_New(0);
				1636	if (chunks == NULL)
				1637	goto error;
				1638	}
				1639	if (PyList_Append(chunks, remaining) < 0)
				1640	goto error;
				1641	Py_CLEAR(remaining);
				1642	}
				1643	if (chunks != NULL) {
				1644	if (line != NULL && PyList_Append(chunks, line) < 0)
				1645	goto error;
				1646	Py_CLEAR(line);
				1647	line = PyUnicode_Join(_PyIO_empty_str, chunks);
				1648	if (line == NULL)
				1649	goto error;
				1650	Py_DECREF(chunks);
				1651	}
				1652	if (line == NULL)
				1653	line = PyUnicode_FromStringAndSize(NULL, 0);
				1654
				1655	return line;
				1656
				1657	error:
				1658	Py_XDECREF(chunks);
				1659	Py_XDECREF(remaining);
				1660	Py_XDECREF(line);
				1661	return NULL;
				1662	}
				1663
				1664	static PyObject *
				1665	TextIOWrapper_readline(PyTextIOWrapperObject self, PyObject args)
				1666	{
				1667	Py_ssize_t limit = -1;
				1668
				1669	CHECK_INITIALIZED(self);
				1670	if (!PyArg_ParseTuple(args, "\|n:readline", &limit)) {
				1671	return NULL;
				1672	}
				1673	return _TextIOWrapper_readline(self, limit);
				1674	}
				1675
				1676	/* Seek and Tell */
				1677
				1678	typedef struct {
				1679	Py_off_t start_pos;
				1680	int dec_flags;
				1681	int bytes_to_feed;
				1682	int chars_to_skip;
				1683	char need_eof;
				1684	} CookieStruct;
				1685
				1686	/*
				1687	To speed up cookie packing/unpacking, we store the fields in a temporary
				1688	string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
				1689	The following macros define at which offsets in the intermediary byte
				1690	string the various CookieStruct fields will be stored.
				1691	*/
				1692
				1693	#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
				1694
				1695	#if defined(WORDS_BIGENDIAN)
				1696
				1697	# define IS_LITTLE_ENDIAN 0
				1698
				1699	/* We want the least significant byte of start_pos to also be the least
				1700	significant byte of the cookie, which means that in big-endian mode we
				1701	must copy the fields in reverse order. */
				1702
				1703	# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
				1704	# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
				1705	# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
				1706	# define OFF_CHARS_TO_SKIP (sizeof(char))
				1707	# define OFF_NEED_EOF 0
				1708
				1709	#else
				1710
				1711	# define IS_LITTLE_ENDIAN 1
				1712
				1713	/* Little-endian mode: the least significant byte of start_pos will
				1714	naturally end up the least significant byte of the cookie. */
				1715
				1716	# define OFF_START_POS 0
				1717	# define OFF_DEC_FLAGS (sizeof(Py_off_t))
				1718	# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
				1719	# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
				1720	# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
				1721
				1722	#endif
				1723
				1724	static int
				1725	TextIOWrapper_parseCookie(CookieStruct cookie, PyObject cookieObj)
				1726	{
				1727	unsigned char buffer[COOKIE_BUF_LEN];
				1728	PyLongObject cookieLong = (PyLongObject )PyNumber_Long(cookieObj);
				1729	if (cookieLong == NULL)
				1730	return -1;
				1731
				1732	if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
				1733	IS_LITTLE_ENDIAN, 0) < 0) {
				1734	Py_DECREF(cookieLong);
				1735	return -1;
				1736	}
				1737	Py_DECREF(cookieLong);
				1738
Antoine Pitrou	2db74c2	2009-03-06 21:49:02 +0000	[diff] [blame^]	1739	memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
				1740	memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
				1741	memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
				1742	memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
				1743	memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1744
				1745	return 0;
				1746	}
				1747
				1748	static PyObject *
				1749	TextIOWrapper_buildCookie(CookieStruct *cookie)
				1750	{
				1751	unsigned char buffer[COOKIE_BUF_LEN];
				1752
Antoine Pitrou	2db74c2	2009-03-06 21:49:02 +0000	[diff] [blame^]	1753	memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
				1754	memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
				1755	memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
				1756	memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
				1757	memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1758
				1759	return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
				1760	}
				1761	#undef IS_LITTLE_ENDIAN
				1762
				1763	static int
				1764	_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
				1765	CookieStruct *cookie)
				1766	{
				1767	PyObject *res;
				1768	/* When seeking to the start of the stream, we call decoder.reset()
				1769	rather than decoder.getstate().
				1770	This is for a few decoders such as utf-16 for which the state value
				1771	at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
				1772	utf-16, that we are expecting a BOM).
				1773	*/
				1774	if (cookie->start_pos == 0 && cookie->dec_flags == 0)
				1775	res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
				1776	else
				1777	res = PyObject_CallMethod(self->decoder, "setstate",
				1778	"((yi))", "", cookie->dec_flags);
				1779	if (res == NULL)
				1780	return -1;
				1781	Py_DECREF(res);
				1782	return 0;
				1783	}
				1784
				1785	static PyObject *
				1786	TextIOWrapper_seek(PyTextIOWrapperObject self, PyObject args)
				1787	{
				1788	PyObject cookieObj, posobj;
				1789	CookieStruct cookie;
				1790	int whence = 0;
				1791	static PyObject *zero = NULL;
				1792	PyObject *res;
				1793	int cmp;
				1794
				1795	CHECK_INITIALIZED(self);
				1796
				1797	if (zero == NULL) {
				1798	zero = PyLong_FromLong(0L);
				1799	if (zero == NULL)
				1800	return NULL;
				1801	}
				1802
				1803	if (!PyArg_ParseTuple(args, "O\|i:seek", &cookieObj, &whence))
				1804	return NULL;
				1805	CHECK_CLOSED(self);
				1806
				1807	Py_INCREF(cookieObj);
				1808
				1809	if (!self->seekable) {
				1810	PyErr_SetString(PyExc_IOError,
				1811	"underlying stream is not seekable");
				1812	goto fail;
				1813	}
				1814
				1815	if (whence == 1) {
				1816	/* seek relative to current position */
				1817	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
				1818	if (cmp < 0)
				1819	goto fail;
				1820
				1821	if (cmp == 0) {
				1822	PyErr_SetString(PyExc_IOError,
				1823	"can't do nonzero cur-relative seeks");
				1824	goto fail;
				1825	}
				1826
				1827	/* Seeking to the current position should attempt to
				1828	* sync the underlying buffer with the current position.
				1829	*/
				1830	Py_DECREF(cookieObj);
				1831	cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
				1832	if (cookieObj == NULL)
				1833	goto fail;
				1834	}
				1835	else if (whence == 2) {
				1836	/* seek relative to end of file */
				1837
				1838	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
				1839	if (cmp < 0)
				1840	goto fail;
				1841
				1842	if (cmp == 0) {
				1843	PyErr_SetString(PyExc_IOError,
				1844	"can't do nonzero end-relative seeks");
				1845	goto fail;
				1846	}
				1847
				1848	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				1849	if (res == NULL)
				1850	goto fail;
				1851	Py_DECREF(res);
				1852
				1853	TextIOWrapper_set_decoded_chars(self, NULL);
				1854	Py_CLEAR(self->snapshot);
				1855	if (self->decoder) {
				1856	res = PyObject_CallMethod(self->decoder, "reset", NULL);
				1857	if (res == NULL)
				1858	goto fail;
				1859	Py_DECREF(res);
				1860	}
				1861
				1862	res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
				1863	Py_XDECREF(cookieObj);
				1864	return res;
				1865	}
				1866	else if (whence != 0) {
				1867	PyErr_Format(PyExc_ValueError,
				1868	"invalid whence (%d, should be 0, 1 or 2)", whence);
				1869	goto fail;
				1870	}
				1871
				1872	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
				1873	if (cmp < 0)
				1874	goto fail;
				1875
				1876	if (cmp == 1) {
				1877	PyErr_Format(PyExc_ValueError,
				1878	"negative seek position %R", cookieObj);
				1879	goto fail;
				1880	}
				1881
				1882	res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
				1883	if (res == NULL)
				1884	goto fail;
				1885	Py_DECREF(res);
				1886
				1887	/* The strategy of seek() is to go back to the safe start point
				1888	* and replay the effect of read(chars_to_skip) from there.
				1889	*/
				1890	if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
				1891	goto fail;
				1892
				1893	/* Seek back to the safe start point. */
				1894	posobj = PyLong_FromOff_t(cookie.start_pos);
				1895	if (posobj == NULL)
				1896	goto fail;
				1897	res = PyObject_CallMethodObjArgs(self->buffer,
				1898	_PyIO_str_seek, posobj, NULL);
				1899	Py_DECREF(posobj);
				1900	if (res == NULL)
				1901	goto fail;
				1902	Py_DECREF(res);
				1903
				1904	TextIOWrapper_set_decoded_chars(self, NULL);
				1905	Py_CLEAR(self->snapshot);
				1906
				1907	/* Restore the decoder to its state from the safe start point. */
				1908	if (self->decoder) {
				1909	if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
				1910	goto fail;
				1911	}
				1912
				1913	if (cookie.chars_to_skip) {
				1914	/* Just like _read_chunk, feed the decoder and save a snapshot. */
				1915	PyObject *input_chunk = PyObject_CallMethod(
				1916	self->buffer, "read", "i", cookie.bytes_to_feed);
				1917	PyObject *decoded;
				1918
				1919	if (input_chunk == NULL)
				1920	goto fail;
				1921
				1922	assert (PyBytes_Check(input_chunk));
				1923
				1924	self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
				1925	if (self->snapshot == NULL) {
				1926	Py_DECREF(input_chunk);
				1927	goto fail;
				1928	}
				1929
				1930	decoded = PyObject_CallMethod(self->decoder, "decode",
				1931	"Oi", input_chunk, (int)cookie.need_eof);
				1932
				1933	if (decoded == NULL)
				1934	goto fail;
				1935
				1936	TextIOWrapper_set_decoded_chars(self, decoded);
				1937
				1938	/* Skip chars_to_skip of the decoded characters. */
				1939	if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
				1940	PyErr_SetString(PyExc_IOError, "can't restore logical file position");
				1941	goto fail;
				1942	}
				1943	self->decoded_chars_used = cookie.chars_to_skip;
				1944	}
				1945	else {
				1946	self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
				1947	if (self->snapshot == NULL)
				1948	goto fail;
				1949	}
				1950
				1951	return cookieObj;
				1952	fail:
				1953	Py_XDECREF(cookieObj);
				1954	return NULL;
				1955
				1956	}
				1957
				1958	static PyObject *
				1959	TextIOWrapper_tell(PyTextIOWrapperObject self, PyObject args)
				1960	{
				1961	PyObject *res;
				1962	PyObject *posobj = NULL;
				1963	CookieStruct cookie = {0,0,0,0,0};
				1964	PyObject *next_input;
				1965	Py_ssize_t chars_to_skip, chars_decoded;
				1966	PyObject *saved_state = NULL;
				1967	char input, input_end;
				1968
				1969	CHECK_INITIALIZED(self);
				1970	CHECK_CLOSED(self);
				1971
				1972	if (!self->seekable) {
				1973	PyErr_SetString(PyExc_IOError,
				1974	"underlying stream is not seekable");
				1975	goto fail;
				1976	}
				1977	if (!self->telling) {
				1978	PyErr_SetString(PyExc_IOError,
				1979	"telling position disabled by next() call");
				1980	goto fail;
				1981	}
				1982
				1983	if (_TextIOWrapper_writeflush(self) < 0)
				1984	return NULL;
				1985	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				1986	if (res == NULL)
				1987	goto fail;
				1988	Py_DECREF(res);
				1989
				1990	posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
				1991	if (posobj == NULL)
				1992	goto fail;
				1993
				1994	if (self->decoder == NULL \|\| self->snapshot == NULL) {
				1995	assert (self->decoded_chars == NULL \|\| PyUnicode_GetSize(self->decoded_chars) == 0);
				1996	return posobj;
				1997	}
				1998
				1999	#if defined(HAVE_LARGEFILE_SUPPORT)
				2000	cookie.start_pos = PyLong_AsLongLong(posobj);
				2001	#else
				2002	cookie.start_pos = PyLong_AsLong(posobj);
				2003	#endif
				2004	if (PyErr_Occurred())
				2005	goto fail;
				2006
				2007	/* Skip backward to the snapshot point (see _read_chunk). */
				2008	if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
				2009	goto fail;
				2010
				2011	assert (PyBytes_Check(next_input));
				2012
				2013	cookie.start_pos -= PyBytes_GET_SIZE(next_input);
				2014
				2015	/* How many decoded characters have been used up since the snapshot? */
				2016	if (self->decoded_chars_used == 0) {
				2017	/* We haven't moved from the snapshot point. */
				2018	Py_DECREF(posobj);
				2019	return TextIOWrapper_buildCookie(&cookie);
				2020	}
				2021
				2022	chars_to_skip = self->decoded_chars_used;
				2023
				2024	/* Starting from the snapshot position, we will walk the decoder
				2025	* forward until it gives us enough decoded characters.
				2026	*/
				2027	saved_state = PyObject_CallMethodObjArgs(self->decoder,
				2028	_PyIO_str_getstate, NULL);
				2029	if (saved_state == NULL)
				2030	goto fail;
				2031
				2032	/* Note our initial start point. */
				2033	if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
				2034	goto fail;
				2035
				2036	/* Feed the decoder one byte at a time. As we go, note the
				2037	* nearest "safe start point" before the current location
				2038	* (a point where the decoder has nothing buffered, so seek()
				2039	* can safely start from there and advance to this location).
				2040	*/
				2041	chars_decoded = 0;
				2042	input = PyBytes_AS_STRING(next_input);
				2043	input_end = input + PyBytes_GET_SIZE(next_input);
				2044	while (input < input_end) {
				2045	PyObject *state;
				2046	char *dec_buffer;
				2047	Py_ssize_t dec_buffer_len;
				2048	int dec_flags;
				2049
				2050	PyObject *decoded = PyObject_CallMethod(
				2051	self->decoder, "decode", "y#", input, 1);
				2052	if (decoded == NULL)
				2053	goto fail;
				2054	assert (PyUnicode_Check(decoded));
				2055	chars_decoded += PyUnicode_GET_SIZE(decoded);
				2056	Py_DECREF(decoded);
				2057
				2058	cookie.bytes_to_feed += 1;
				2059
				2060	state = PyObject_CallMethodObjArgs(self->decoder,
				2061	_PyIO_str_getstate, NULL);
				2062	if (state == NULL)
				2063	goto fail;
				2064	if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
				2065	Py_DECREF(state);
				2066	goto fail;
				2067	}
				2068	Py_DECREF(state);
				2069
				2070	if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
				2071	/* Decoder buffer is empty, so this is a safe start point. */
				2072	cookie.start_pos += cookie.bytes_to_feed;
				2073	chars_to_skip -= chars_decoded;
				2074	cookie.dec_flags = dec_flags;
				2075	cookie.bytes_to_feed = 0;
				2076	chars_decoded = 0;
				2077	}
				2078	if (chars_decoded >= chars_to_skip)
				2079	break;
				2080	input++;
				2081	}
				2082	if (input == input_end) {
				2083	/* We didn't get enough decoded data; signal EOF to get more. */
				2084	PyObject *decoded = PyObject_CallMethod(
				2085	self->decoder, "decode", "yi", "", /* final = */ 1);
				2086	if (decoded == NULL)
				2087	goto fail;
				2088	assert (PyUnicode_Check(decoded));
				2089	chars_decoded += PyUnicode_GET_SIZE(decoded);
				2090	Py_DECREF(decoded);
				2091	cookie.need_eof = 1;
				2092
				2093	if (chars_decoded < chars_to_skip) {
				2094	PyErr_SetString(PyExc_IOError,
				2095	"can't reconstruct logical file position");
				2096	goto fail;
				2097	}
				2098	}
				2099
				2100	/* finally */
				2101	Py_XDECREF(posobj);
				2102	res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
				2103	Py_DECREF(saved_state);
				2104	if (res == NULL)
				2105	return NULL;
				2106	Py_DECREF(res);
				2107
				2108	/* The returned cookie corresponds to the last safe start point. */
				2109	cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
				2110	return TextIOWrapper_buildCookie(&cookie);
				2111
				2112	fail:
				2113	Py_XDECREF(posobj);
				2114	if (saved_state) {
				2115	PyObject type, value, *traceback;
				2116	PyErr_Fetch(&type, &value, &traceback);
				2117
				2118	res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
				2119	Py_DECREF(saved_state);
				2120	if (res == NULL)
				2121	return NULL;
				2122	Py_DECREF(res);
				2123
				2124	PyErr_Restore(type, value, traceback);
				2125	}
				2126	return NULL;
				2127	}
				2128
				2129	static PyObject *
				2130	TextIOWrapper_truncate(PyTextIOWrapperObject self, PyObject args)
				2131	{
				2132	PyObject *pos = Py_None;
				2133	PyObject *res;
				2134
				2135	CHECK_INITIALIZED(self)
				2136	if (!PyArg_ParseTuple(args, "\|O:truncate", &pos)) {
				2137	return NULL;
				2138	}
				2139
				2140	res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
				2141	if (res == NULL)
				2142	return NULL;
				2143	Py_DECREF(res);
				2144
				2145	if (pos != Py_None) {
				2146	res = PyObject_CallMethodObjArgs((PyObject *) self,
				2147	_PyIO_str_seek, pos, NULL);
				2148	if (res == NULL)
				2149	return NULL;
				2150	Py_DECREF(res);
				2151	}
				2152
				2153	return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
				2154	}
				2155
				2156	/* Inquiries */
				2157
				2158	static PyObject *
				2159	TextIOWrapper_fileno(PyTextIOWrapperObject self, PyObject args)
				2160	{
				2161	CHECK_INITIALIZED(self);
				2162	return PyObject_CallMethod(self->buffer, "fileno", NULL);
				2163	}
				2164
				2165	static PyObject *
				2166	TextIOWrapper_seekable(PyTextIOWrapperObject self, PyObject args)
				2167	{
				2168	CHECK_INITIALIZED(self);
				2169	return PyObject_CallMethod(self->buffer, "seekable", NULL);
				2170	}
				2171
				2172	static PyObject *
				2173	TextIOWrapper_readable(PyTextIOWrapperObject self, PyObject args)
				2174	{
				2175	CHECK_INITIALIZED(self);
				2176	return PyObject_CallMethod(self->buffer, "readable", NULL);
				2177	}
				2178
				2179	static PyObject *
				2180	TextIOWrapper_writable(PyTextIOWrapperObject self, PyObject args)
				2181	{
				2182	CHECK_INITIALIZED(self);
				2183	return PyObject_CallMethod(self->buffer, "writable", NULL);
				2184	}
				2185
				2186	static PyObject *
				2187	TextIOWrapper_isatty(PyTextIOWrapperObject self, PyObject args)
				2188	{
				2189	CHECK_INITIALIZED(self);
				2190	return PyObject_CallMethod(self->buffer, "isatty", NULL);
				2191	}
				2192
				2193	static PyObject *
				2194	TextIOWrapper_flush(PyTextIOWrapperObject self, PyObject args)
				2195	{
				2196	CHECK_INITIALIZED(self);
				2197	CHECK_CLOSED(self);
				2198	self->telling = self->seekable;
				2199	if (_TextIOWrapper_writeflush(self) < 0)
				2200	return NULL;
				2201	return PyObject_CallMethod(self->buffer, "flush", NULL);
				2202	}
				2203
				2204	static PyObject *
				2205	TextIOWrapper_close(PyTextIOWrapperObject self, PyObject args)
				2206	{
				2207	PyObject *res;
				2208	CHECK_INITIALIZED(self);
				2209	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				2210	if (res == NULL) {
				2211	/* If flush() fails, just give up */
				2212	PyErr_Clear();
				2213	}
				2214	else
				2215	Py_DECREF(res);
				2216
				2217	return PyObject_CallMethod(self->buffer, "close", NULL);
				2218	}
				2219
				2220	static PyObject *
				2221	TextIOWrapper_iternext(PyTextIOWrapperObject *self)
				2222	{
				2223	PyObject *line;
				2224
				2225	CHECK_INITIALIZED(self);
				2226
				2227	self->telling = 0;
				2228	if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
				2229	/* Skip method call overhead for speed */
				2230	line = _TextIOWrapper_readline(self, -1);
				2231	}
				2232	else {
				2233	line = PyObject_CallMethodObjArgs((PyObject *)self,
				2234	_PyIO_str_readline, NULL);
				2235	if (line && !PyUnicode_Check(line)) {
				2236	PyErr_Format(PyExc_IOError,
				2237	"readline() should have returned an str object, "
				2238	"not '%.200s'", Py_TYPE(line)->tp_name);
				2239	Py_DECREF(line);
				2240	return NULL;
				2241	}
				2242	}
				2243
				2244	if (line == NULL)
				2245	return NULL;
				2246
				2247	if (PyUnicode_GET_SIZE(line) == 0) {
				2248	/* Reached EOF or would have blocked */
				2249	Py_DECREF(line);
				2250	Py_CLEAR(self->snapshot);
				2251	self->telling = self->seekable;
				2252	return NULL;
				2253	}
				2254
				2255	return line;
				2256	}
				2257
				2258	static PyObject *
				2259	TextIOWrapper_name_get(PyTextIOWrapperObject self, void context)
				2260	{
				2261	CHECK_INITIALIZED(self);
				2262	return PyObject_GetAttrString(self->buffer, "name");
				2263	}
				2264
				2265	static PyObject *
				2266	TextIOWrapper_closed_get(PyTextIOWrapperObject self, void context)
				2267	{
				2268	CHECK_INITIALIZED(self);
				2269	return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
				2270	}
				2271
				2272	static PyObject *
				2273	TextIOWrapper_newlines_get(PyTextIOWrapperObject self, void context)
				2274	{
				2275	PyObject *res;
				2276	CHECK_INITIALIZED(self);
				2277	if (self->decoder == NULL)
				2278	Py_RETURN_NONE;
				2279	res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
				2280	if (res == NULL) {
				2281	PyErr_Clear();
				2282	Py_RETURN_NONE;
				2283	}
				2284	return res;
				2285	}
				2286
				2287	static PyObject *
				2288	TextIOWrapper_chunk_size_get(PyTextIOWrapperObject self, void context)
				2289	{
				2290	CHECK_INITIALIZED(self);
				2291	return PyLong_FromSsize_t(self->chunk_size);
				2292	}
				2293
				2294	static int
				2295	TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
				2296	PyObject arg, void context)
				2297	{
				2298	Py_ssize_t n;
				2299	CHECK_INITIALIZED_INT(self);
				2300	n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
				2301	if (n == -1 && PyErr_Occurred())
				2302	return -1;
				2303	if (n <= 0) {
				2304	PyErr_SetString(PyExc_ValueError,
				2305	"a strictly positive integer is required");
				2306	return -1;
				2307	}
				2308	self->chunk_size = n;
				2309	return 0;
				2310	}
				2311
				2312	static PyMethodDef TextIOWrapper_methods[] = {
				2313	{"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
				2314	{"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
				2315	{"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
				2316	{"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
				2317	{"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
				2318
				2319	{"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
				2320	{"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
				2321	{"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
				2322	{"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
				2323	{"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
				2324
				2325	{"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
				2326	{"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
				2327	{"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
				2328	{NULL, NULL}
				2329	};
				2330
				2331	static PyMemberDef TextIOWrapper_members[] = {
				2332	{"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
				2333	{"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
				2334	{"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
				2335	{NULL}
				2336	};
				2337
				2338	static PyGetSetDef TextIOWrapper_getset[] = {
				2339	{"name", (getter)TextIOWrapper_name_get, NULL, NULL},
				2340	{"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
				2341	/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
				2342	*/
				2343	{"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
				2344	{"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
				2345	(setter)TextIOWrapper_chunk_size_set, NULL},
				2346	{0}
				2347	};
				2348
				2349	PyTypeObject PyTextIOWrapper_Type = {
				2350	PyVarObject_HEAD_INIT(NULL, 0)
				2351	"_io.TextIOWrapper", /tp_name/
				2352	sizeof(PyTextIOWrapperObject), /tp_basicsize/
				2353	0, /tp_itemsize/
				2354	(destructor)TextIOWrapper_dealloc, /tp_dealloc/
				2355	0, /tp_print/
				2356	0, /tp_getattr/
				2357	0, /tp_setattr/
				2358	0, /tp_compare /
				2359	0, /tp_repr/
				2360	0, /tp_as_number/
				2361	0, /tp_as_sequence/
				2362	0, /tp_as_mapping/
				2363	0, /tp_hash /
				2364	0, /tp_call/
				2365	0, /tp_str/
				2366	0, /tp_getattro/
				2367	0, /tp_setattro/
				2368	0, /tp_as_buffer/
				2369	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE
				2370	\| Py_TPFLAGS_HAVE_GC, /tp_flags/
				2371	TextIOWrapper_doc, /* tp_doc */
				2372	(traverseproc)TextIOWrapper_traverse, /* tp_traverse */
				2373	(inquiry)TextIOWrapper_clear, /* tp_clear */
				2374	0, /* tp_richcompare */
				2375	offsetof(PyTextIOWrapperObject, weakreflist), /tp_weaklistoffset/
				2376	0, /* tp_iter */
				2377	(iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
				2378	TextIOWrapper_methods, /* tp_methods */
				2379	TextIOWrapper_members, /* tp_members */
				2380	TextIOWrapper_getset, /* tp_getset */
				2381	0, /* tp_base */
				2382	0, /* tp_dict */
				2383	0, /* tp_descr_get */
				2384	0, /* tp_descr_set */
				2385	offsetof(PyTextIOWrapperObject, dict), /tp_dictoffset/
				2386	(initproc)TextIOWrapper_init, /* tp_init */
				2387	0, /* tp_alloc */
				2388	PyType_GenericNew, /* tp_new */
				2389	};