Blame - Modules/_io/textio.c - platform/external/python/cpython3

blob: cc229a8562f679d6522f812ad38420ff5e7be3f7 [file] [log] [blame]

Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1	/*
				2	An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	3
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	4	Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	5
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	6	Written by Amaury Forgeot d'Arc and Antoine Pitrou
				7	*/
				8
				9	#define PY_SSIZE_T_CLEAN
				10	#include "Python.h"
				11	#include "structmember.h"
				12	#include "_iomodule.h"
				13
				14	/* TextIOBase */
				15
				16	PyDoc_STRVAR(TextIOBase_doc,
				17	"Base class for text I/O.\n"
				18	"\n"
				19	"This class provides a character and line based interface to stream\n"
				20	"I/O. There is no readinto method because Python's character strings\n"
				21	"are immutable. There is no public constructor.\n"
				22	);
				23
				24	static PyObject *
				25	_unsupported(const char *message)
				26	{
				27	PyErr_SetString(IO_STATE->unsupported_operation, message);
				28	return NULL;
				29	}
				30
				31	PyDoc_STRVAR(TextIOBase_read_doc,
				32	"Read at most n characters from stream.\n"
				33	"\n"
				34	"Read from underlying buffer until we have n characters or we hit EOF.\n"
				35	"If n is negative or omitted, read until EOF.\n"
				36	);
				37
				38	static PyObject *
				39	TextIOBase_read(PyObject self, PyObject args)
				40	{
				41	return _unsupported("read");
				42	}
				43
				44	PyDoc_STRVAR(TextIOBase_readline_doc,
				45	"Read until newline or EOF.\n"
				46	"\n"
				47	"Returns an empty string if EOF is hit immediately.\n"
				48	);
				49
				50	static PyObject *
				51	TextIOBase_readline(PyObject self, PyObject args)
				52	{
				53	return _unsupported("readline");
				54	}
				55
				56	PyDoc_STRVAR(TextIOBase_write_doc,
				57	"Write string to stream.\n"
				58	"Returns the number of characters written (which is always equal to\n"
				59	"the length of the string).\n"
				60	);
				61
				62	static PyObject *
				63	TextIOBase_write(PyObject self, PyObject args)
				64	{
				65	return _unsupported("write");
				66	}
				67
				68	PyDoc_STRVAR(TextIOBase_encoding_doc,
				69	"Encoding of the text stream.\n"
				70	"\n"
				71	"Subclasses should override.\n"
				72	);
				73
				74	static PyObject *
				75	TextIOBase_encoding_get(PyObject self, void context)
				76	{
				77	Py_RETURN_NONE;
				78	}
				79
				80	PyDoc_STRVAR(TextIOBase_newlines_doc,
				81	"Line endings translated so far.\n"
				82	"\n"
				83	"Only line endings translated during reading are considered.\n"
				84	"\n"
				85	"Subclasses should override.\n"
				86	);
				87
				88	static PyObject *
				89	TextIOBase_newlines_get(PyObject self, void context)
				90	{
				91	Py_RETURN_NONE;
				92	}
				93
				94
				95	static PyMethodDef TextIOBase_methods[] = {
				96	{"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
				97	{"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
				98	{"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
				99	{NULL, NULL}
				100	};
				101
				102	static PyGetSetDef TextIOBase_getset[] = {
				103	{"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
				104	{"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
				105	{0}
				106	};
				107
				108	PyTypeObject PyTextIOBase_Type = {
				109	PyVarObject_HEAD_INIT(NULL, 0)
				110	"_io._TextIOBase", /tp_name/
				111	0, /tp_basicsize/
				112	0, /tp_itemsize/
				113	0, /tp_dealloc/
				114	0, /tp_print/
				115	0, /tp_getattr/
				116	0, /tp_setattr/
				117	0, /tp_compare /
				118	0, /tp_repr/
				119	0, /tp_as_number/
				120	0, /tp_as_sequence/
				121	0, /tp_as_mapping/
				122	0, /tp_hash /
				123	0, /tp_call/
				124	0, /tp_str/
				125	0, /tp_getattro/
				126	0, /tp_setattro/
				127	0, /tp_as_buffer/
				128	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /tp_flags/
				129	TextIOBase_doc, /* tp_doc */
				130	0, /* tp_traverse */
				131	0, /* tp_clear */
				132	0, /* tp_richcompare */
				133	0, /* tp_weaklistoffset */
				134	0, /* tp_iter */
				135	0, /* tp_iternext */
				136	TextIOBase_methods, /* tp_methods */
				137	0, /* tp_members */
				138	TextIOBase_getset, /* tp_getset */
				139	&PyIOBase_Type, /* tp_base */
				140	0, /* tp_dict */
				141	0, /* tp_descr_get */
				142	0, /* tp_descr_set */
				143	0, /* tp_dictoffset */
				144	0, /* tp_init */
				145	0, /* tp_alloc */
				146	0, /* tp_new */
				147	};
				148
				149
				150	/* IncrementalNewlineDecoder */
				151
				152	PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
				153	"Codec used when reading a file in universal newlines mode. It wraps\n"
				154	"another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
				155	"records the types of newlines encountered. When used with\n"
				156	"translate=False, it ensures that the newline sequence is returned in\n"
				157	"one piece. When used with decoder=None, it expects unicode strings as\n"
				158	"decode input and translates newlines without first invoking an external\n"
				159	"decoder.\n"
				160	);
				161
				162	typedef struct {
				163	PyObject_HEAD
				164	PyObject *decoder;
				165	PyObject *errors;
				166	int pendingcr:1;
				167	int translate:1;
				168	unsigned int seennl:3;
				169	} PyNewLineDecoderObject;
				170
				171	static int
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	172	IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	173	PyObject args, PyObject kwds)
				174	{
				175	PyObject *decoder;
				176	int translate;
				177	PyObject *errors = NULL;
				178	char *kwlist[] = {"decoder", "translate", "errors", NULL};
				179
				180	if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi\|O:IncrementalNewlineDecoder",
				181	kwlist, &decoder, &translate, &errors))
				182	return -1;
				183
				184	self->decoder = decoder;
				185	Py_INCREF(decoder);
				186
				187	if (errors == NULL) {
				188	self->errors = PyUnicode_FromString("strict");
				189	if (self->errors == NULL)
				190	return -1;
				191	}
				192	else {
				193	Py_INCREF(errors);
				194	self->errors = errors;
				195	}
				196
				197	self->translate = translate;
				198	self->seennl = 0;
				199	self->pendingcr = 0;
				200
				201	return 0;
				202	}
				203
				204	static void
				205	IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
				206	{
				207	Py_CLEAR(self->decoder);
				208	Py_CLEAR(self->errors);
				209	Py_TYPE(self)->tp_free((PyObject *)self);
				210	}
				211
				212	#define SEEN_CR 1
				213	#define SEEN_LF 2
				214	#define SEEN_CRLF 4
				215	#define SEEN_ALL (SEEN_CR \| SEEN_LF \| SEEN_CRLF)
				216
				217	PyObject *
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	218	_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	219	PyObject *input, int final)
				220	{
				221	PyObject *output;
				222	Py_ssize_t output_len;
				223	PyNewLineDecoderObject self = (PyNewLineDecoderObject ) _self;
				224
				225	if (self->decoder == NULL) {
				226	PyErr_SetString(PyExc_ValueError,
				227	"IncrementalNewlineDecoder.__init__ not called");
				228	return NULL;
				229	}
				230
				231	/* decode input (with the eventual \r from a previous pass) */
				232	if (self->decoder != Py_None) {
				233	output = PyObject_CallMethodObjArgs(self->decoder,
				234	_PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
				235	}
				236	else {
				237	output = input;
				238	Py_INCREF(output);
				239	}
				240
				241	if (output == NULL)
				242	return NULL;
				243
				244	if (!PyUnicode_Check(output)) {
				245	PyErr_SetString(PyExc_TypeError,
				246	"decoder should return a string result");
				247	goto error;
				248	}
				249
				250	output_len = PyUnicode_GET_SIZE(output);
				251	if (self->pendingcr && (final \|\| output_len > 0)) {
				252	Py_UNICODE *out;
				253	PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
				254	if (modified == NULL)
				255	goto error;
				256	out = PyUnicode_AS_UNICODE(modified);
				257	out[0] = '\r';
				258	memcpy(out + 1, PyUnicode_AS_UNICODE(output),
				259	output_len * sizeof(Py_UNICODE));
				260	Py_DECREF(output);
				261	output = modified;
				262	self->pendingcr = 0;
				263	output_len++;
				264	}
				265
				266	/* retain last \r even when not translating data:
				267	* then readline() is sure to get \r\n in one pass
				268	*/
				269	if (!final) {
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	270	if (output_len > 0
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	271	&& PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
				272
				273	if (Py_REFCNT(output) == 1) {
				274	if (PyUnicode_Resize(&output, output_len - 1) < 0)
				275	goto error;
				276	}
				277	else {
				278	PyObject *modified = PyUnicode_FromUnicode(
				279	PyUnicode_AS_UNICODE(output),
				280	output_len - 1);
				281	if (modified == NULL)
				282	goto error;
				283	Py_DECREF(output);
				284	output = modified;
				285	}
				286	self->pendingcr = 1;
				287	}
				288	}
				289
				290	/* Record which newlines are read and do newline translation if desired,
				291	all in one pass. */
				292	{
				293	Py_UNICODE *in_str;
				294	Py_ssize_t len;
				295	int seennl = self->seennl;
				296	int only_lf = 0;
				297
				298	in_str = PyUnicode_AS_UNICODE(output);
				299	len = PyUnicode_GET_SIZE(output);
				300
				301	if (len == 0)
				302	return output;
				303
				304	/* If, up to now, newlines are consistently \n, do a quick check
				305	for the \r byte with the libc's optimized memchr.
				306	*/
				307	if (seennl == SEEN_LF \|\| seennl == 0) {
Antoine Pitrou	0e94189	2009-03-06 23:57:20 +0000	[diff] [blame]	308	only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	309	}
				310
Antoine Pitrou	66913e2	2009-03-06 23:40:56 +0000	[diff] [blame]	311	if (only_lf) {
				312	/* If not already seen, quick scan for a possible "\n" character.
				313	(there's nothing else to be done, even when in translation mode)
				314	*/
				315	if (seennl == 0 &&
				316	memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
				317	Py_UNICODE s, end;
				318	s = in_str;
				319	end = in_str + len;
				320	for (;;) {
				321	Py_UNICODE c;
				322	/* Fast loop for non-control characters */
				323	while (*s > '\n')
				324	s++;
				325	c = *s++;
				326	if (c == '\n') {
				327	seennl \|= SEEN_LF;
				328	break;
				329	}
				330	if (s > end)
				331	break;
				332	}
				333	}
				334	/* Finished: we have scanned for newlines, and none of them
				335	need translating */
				336	}
				337	else if (!self->translate) {
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	338	Py_UNICODE s, end;
Antoine Pitrou	66913e2	2009-03-06 23:40:56 +0000	[diff] [blame]	339	/* We have already seen all newline types, no need to scan again */
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	340	if (seennl == SEEN_ALL)
				341	goto endscan;
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	342	s = in_str;
				343	end = in_str + len;
				344	for (;;) {
				345	Py_UNICODE c;
				346	/* Fast loop for non-control characters */
				347	while (*s > '\r')
				348	s++;
				349	c = *s++;
				350	if (c == '\n')
				351	seennl \|= SEEN_LF;
				352	else if (c == '\r') {
				353	if (*s == '\n') {
				354	seennl \|= SEEN_CRLF;
				355	s++;
				356	}
				357	else
				358	seennl \|= SEEN_CR;
				359	}
				360	if (s > end)
				361	break;
				362	if (seennl == SEEN_ALL)
				363	break;
				364	}
				365	endscan:
				366	;
				367	}
Antoine Pitrou	66913e2	2009-03-06 23:40:56 +0000	[diff] [blame]	368	else {
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	369	PyObject *translated = NULL;
				370	Py_UNICODE *out_str;
				371	Py_UNICODE in, out, *end;
				372	if (Py_REFCNT(output) != 1) {
				373	/* We could try to optimize this so that we only do a copy
				374	when there is something to translate. On the other hand,
				375	most decoders should only output non-shared strings, i.e.
				376	translation is done in place. */
				377	translated = PyUnicode_FromUnicode(NULL, len);
				378	if (translated == NULL)
				379	goto error;
				380	assert(Py_REFCNT(translated) == 1);
				381	memcpy(PyUnicode_AS_UNICODE(translated),
				382	PyUnicode_AS_UNICODE(output),
				383	len * sizeof(Py_UNICODE));
				384	}
				385	else {
				386	translated = output;
				387	}
				388	out_str = PyUnicode_AS_UNICODE(translated);
				389	in = in_str;
				390	out = out_str;
				391	end = in_str + len;
				392	for (;;) {
				393	Py_UNICODE c;
				394	/* Fast loop for non-control characters */
				395	while ((c = *in++) > '\r')
				396	*out++ = c;
				397	if (c == '\n') {
				398	*out++ = c;
				399	seennl \|= SEEN_LF;
				400	continue;
				401	}
				402	if (c == '\r') {
				403	if (*in == '\n') {
				404	in++;
				405	seennl \|= SEEN_CRLF;
				406	}
				407	else
				408	seennl \|= SEEN_CR;
				409	*out++ = '\n';
				410	continue;
				411	}
				412	if (in > end)
				413	break;
				414	*out++ = c;
				415	}
				416	if (translated != output) {
				417	Py_DECREF(output);
				418	output = translated;
				419	}
				420	if (out - out_str != len) {
				421	if (PyUnicode_Resize(&output, out - out_str) < 0)
				422	goto error;
				423	}
				424	}
				425	self->seennl \|= seennl;
				426	}
				427
				428	return output;
				429
				430	error:
				431	Py_DECREF(output);
				432	return NULL;
				433	}
				434
				435	static PyObject *
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	436	IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	437	PyObject args, PyObject kwds)
				438	{
				439	char *kwlist[] = {"input", "final", NULL};
				440	PyObject *input;
				441	int final = 0;
				442
				443	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O\|i:IncrementalNewlineDecoder",
				444	kwlist, &input, &final))
				445	return NULL;
				446	return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
				447	}
				448
				449	static PyObject *
				450	IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject self, PyObject args)
				451	{
				452	PyObject *buffer;
				453	unsigned PY_LONG_LONG flag;
				454
				455	if (self->decoder != Py_None) {
				456	PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
				457	_PyIO_str_getstate, NULL);
				458	if (state == NULL)
				459	return NULL;
				460	if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
				461	Py_DECREF(state);
				462	return NULL;
				463	}
				464	Py_INCREF(buffer);
				465	Py_DECREF(state);
				466	}
				467	else {
				468	buffer = PyBytes_FromString("");
				469	flag = 0;
				470	}
				471	flag <<= 1;
				472	if (self->pendingcr)
				473	flag \|= 1;
				474	return Py_BuildValue("NK", buffer, flag);
				475	}
				476
				477	static PyObject *
				478	IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject self, PyObject state)
				479	{
				480	PyObject *buffer;
				481	unsigned PY_LONG_LONG flag;
				482
				483	if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
				484	return NULL;
				485
				486	self->pendingcr = (int) flag & 1;
				487	flag >>= 1;
				488
				489	if (self->decoder != Py_None)
				490	return PyObject_CallMethod(self->decoder,
				491	"setstate", "((OK))", buffer, flag);
				492	else
				493	Py_RETURN_NONE;
				494	}
				495
				496	static PyObject *
				497	IncrementalNewlineDecoder_reset(PyNewLineDecoderObject self, PyObject args)
				498	{
				499	self->seennl = 0;
				500	self->pendingcr = 0;
				501	if (self->decoder != Py_None)
				502	return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
				503	else
				504	Py_RETURN_NONE;
				505	}
				506
				507	static PyObject *
				508	IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject self, void context)
				509	{
				510	switch (self->seennl) {
				511	case SEEN_CR:
				512	return PyUnicode_FromString("\r");
				513	case SEEN_LF:
				514	return PyUnicode_FromString("\n");
				515	case SEEN_CRLF:
				516	return PyUnicode_FromString("\r\n");
				517	case SEEN_CR \| SEEN_LF:
				518	return Py_BuildValue("ss", "\r", "\n");
				519	case SEEN_CR \| SEEN_CRLF:
				520	return Py_BuildValue("ss", "\r", "\r\n");
				521	case SEEN_LF \| SEEN_CRLF:
				522	return Py_BuildValue("ss", "\n", "\r\n");
				523	case SEEN_CR \| SEEN_LF \| SEEN_CRLF:
				524	return Py_BuildValue("sss", "\r", "\n", "\r\n");
				525	default:
				526	Py_RETURN_NONE;
				527	}
				528
				529	}
				530
				531
				532	static PyMethodDef IncrementalNewlineDecoder_methods[] = {
				533	{"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS\|METH_KEYWORDS},
				534	{"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
				535	{"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
				536	{"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
				537	{0}
				538	};
				539
				540	static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
				541	{"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
				542	{0}
				543	};
				544
				545	PyTypeObject PyIncrementalNewlineDecoder_Type = {
				546	PyVarObject_HEAD_INIT(NULL, 0)
				547	"_io.IncrementalNewlineDecoder", /tp_name/
				548	sizeof(PyNewLineDecoderObject), /tp_basicsize/
				549	0, /tp_itemsize/
				550	(destructor)IncrementalNewlineDecoder_dealloc, /tp_dealloc/
				551	0, /tp_print/
				552	0, /tp_getattr/
				553	0, /tp_setattr/
				554	0, /tp_compare /
				555	0, /tp_repr/
				556	0, /tp_as_number/
				557	0, /tp_as_sequence/
				558	0, /tp_as_mapping/
				559	0, /tp_hash /
				560	0, /tp_call/
				561	0, /tp_str/
				562	0, /tp_getattro/
				563	0, /tp_setattro/
				564	0, /tp_as_buffer/
				565	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /tp_flags/
				566	IncrementalNewlineDecoder_doc, /* tp_doc */
				567	0, /* tp_traverse */
				568	0, /* tp_clear */
				569	0, /* tp_richcompare */
				570	0, /tp_weaklistoffset/
				571	0, /* tp_iter */
				572	0, /* tp_iternext */
				573	IncrementalNewlineDecoder_methods, /* tp_methods */
				574	0, /* tp_members */
				575	IncrementalNewlineDecoder_getset, /* tp_getset */
				576	0, /* tp_base */
				577	0, /* tp_dict */
				578	0, /* tp_descr_get */
				579	0, /* tp_descr_set */
				580	0, /* tp_dictoffset */
				581	(initproc)IncrementalNewlineDecoder_init, /* tp_init */
				582	0, /* tp_alloc */
				583	PyType_GenericNew, /* tp_new */
				584	};
				585
				586
				587	/* TextIOWrapper */
				588
				589	PyDoc_STRVAR(TextIOWrapper_doc,
				590	"Character and line based layer over a BufferedIOBase object, buffer.\n"
				591	"\n"
				592	"encoding gives the name of the encoding that the stream will be\n"
				593	"decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
				594	"\n"
				595	"errors determines the strictness of encoding and decoding (see the\n"
				596	"codecs.register) and defaults to \"strict\".\n"
				597	"\n"
				598	"newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
				599	"handling of line endings. If it is None, universal newlines is\n"
				600	"enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
				601	"or '\\r\\n' are translated to '\\n' before being returned to the\n"
				602	"caller. Conversely, on output, '\\n' is translated to the system\n"
				603	"default line seperator, os.linesep. If newline is any other of its\n"
				604	"legal values, that newline becomes the newline when the file is read\n"
				605	"and it is returned untranslated. On output, '\\n' is converted to the\n"
				606	"newline.\n"
				607	"\n"
				608	"If line_buffering is True, a call to flush is implied when a call to\n"
				609	"write contains a newline character."
				610	);
				611
				612	typedef PyObject *
				613	(encodefunc_t)(PyObject , PyObject *);
				614
				615	typedef struct
				616	{
				617	PyObject_HEAD
				618	int ok; /* initialized? */
				619	Py_ssize_t chunk_size;
				620	PyObject *buffer;
				621	PyObject *encoding;
				622	PyObject *encoder;
				623	PyObject *decoder;
				624	PyObject *readnl;
				625	PyObject *errors;
				626	const char writenl; / utf-8 encoded, NULL stands for \n */
				627	char line_buffering;
				628	char readuniversal;
				629	char readtranslate;
				630	char writetranslate;
				631	char seekable;
				632	char telling;
				633	/* Specialized encoding func (see below) */
				634	encodefunc_t encodefunc;
				635
				636	/* Reads and writes are internally buffered in order to speed things up.
				637	However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	638
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	639	Please also note that text to be written is first encoded before being
				640	buffered. This is necessary so that encoding errors are immediately
				641	reported to the caller, but it unfortunately means that the
				642	IncrementalEncoder (whose encode() method is always written in Python)
				643	becomes a bottleneck for small writes.
				644	*/
				645	PyObject decoded_chars; / buffer for text returned from decoder */
				646	Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
				647	PyObject pending_bytes; / list of bytes objects waiting to be
				648	written, or NULL */
				649	Py_ssize_t pending_bytes_count;
				650	PyObject *snapshot;
				651	/* snapshot is either None, or a tuple (dec_flags, next_input) where
				652	* dec_flags is the second (integer) item of the decoder state and
				653	* next_input is the chunk of input bytes that comes next after the
				654	* snapshot point. We use this to reconstruct decoder states in tell().
				655	*/
				656
				657	/* Cache raw object if it's a FileIO object */
				658	PyObject *raw;
				659
				660	PyObject *weakreflist;
				661	PyObject *dict;
				662	} PyTextIOWrapperObject;
				663
				664
				665	/* A couple of specialized cases in order to bypass the slow incremental
				666	encoding methods for the most popular encodings. */
				667
				668	static PyObject *
				669	ascii_encode(PyTextIOWrapperObject self, PyObject text)
				670	{
				671	return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
				672	PyUnicode_GET_SIZE(text),
				673	PyBytes_AS_STRING(self->errors));
				674	}
				675
				676	static PyObject *
				677	utf16be_encode(PyTextIOWrapperObject self, PyObject text)
				678	{
				679	return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				680	PyUnicode_GET_SIZE(text),
				681	PyBytes_AS_STRING(self->errors), 1);
				682	}
				683
				684	static PyObject *
				685	utf16le_encode(PyTextIOWrapperObject self, PyObject text)
				686	{
				687	return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				688	PyUnicode_GET_SIZE(text),
				689	PyBytes_AS_STRING(self->errors), -1);
				690	}
				691
				692	static PyObject *
				693	utf16_encode(PyTextIOWrapperObject self, PyObject text)
				694	{
				695	PyObject *res;
				696	res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
				697	PyUnicode_GET_SIZE(text),
				698	PyBytes_AS_STRING(self->errors), 0);
				699	if (res == NULL)
				700	return NULL;
				701	/* Next writes will skip the BOM and use native byte ordering */
				702	#if defined(WORDS_BIGENDIAN)
				703	self->encodefunc = (encodefunc_t) utf16be_encode;
				704	#else
				705	self->encodefunc = (encodefunc_t) utf16le_encode;
				706	#endif
				707	return res;
				708	}
				709
				710
				711	static PyObject *
				712	utf8_encode(PyTextIOWrapperObject self, PyObject text)
				713	{
				714	return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
				715	PyUnicode_GET_SIZE(text),
				716	PyBytes_AS_STRING(self->errors));
				717	}
				718
				719	static PyObject *
				720	latin1_encode(PyTextIOWrapperObject self, PyObject text)
				721	{
				722	return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
				723	PyUnicode_GET_SIZE(text),
				724	PyBytes_AS_STRING(self->errors));
				725	}
				726
				727	/* Map normalized encoding names onto the specialized encoding funcs */
				728
				729	typedef struct {
				730	const char *name;
				731	encodefunc_t encodefunc;
				732	} encodefuncentry;
				733
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	734	static encodefuncentry encodefuncs[] = {
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	735	{"ascii", (encodefunc_t) ascii_encode},
				736	{"iso8859-1", (encodefunc_t) latin1_encode},
				737	{"utf-16-be", (encodefunc_t) utf16be_encode},
				738	{"utf-16-le", (encodefunc_t) utf16le_encode},
				739	{"utf-16", (encodefunc_t) utf16_encode},
				740	{"utf-8", (encodefunc_t) utf8_encode},
				741	{NULL, NULL}
				742	};
				743
				744
				745	static int
				746	TextIOWrapper_init(PyTextIOWrapperObject self, PyObject args, PyObject *kwds)
				747	{
				748	char *kwlist[] = {"buffer", "encoding", "errors",
				749	"newline", "line_buffering",
				750	NULL};
				751	PyObject buffer, raw;
				752	char *encoding = NULL;
				753	char *errors = NULL;
				754	char *newline = NULL;
				755	int line_buffering = 0;
				756	_PyIO_State *state = IO_STATE;
				757
				758	PyObject *res;
				759	int r;
				760
				761	self->ok = 0;
				762	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O\|zzzi:fileio",
				763	kwlist, &buffer, &encoding, &errors,
				764	&newline, &line_buffering))
				765	return -1;
				766
				767	if (newline && newline[0] != '\0'
				768	&& !(newline[0] == '\n' && newline[1] == '\0')
				769	&& !(newline[0] == '\r' && newline[1] == '\0')
				770	&& !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
				771	PyErr_Format(PyExc_ValueError,
				772	"illegal newline value: %s", newline);
				773	return -1;
				774	}
				775
				776	Py_CLEAR(self->buffer);
				777	Py_CLEAR(self->encoding);
				778	Py_CLEAR(self->encoder);
				779	Py_CLEAR(self->decoder);
				780	Py_CLEAR(self->readnl);
				781	Py_CLEAR(self->decoded_chars);
				782	Py_CLEAR(self->pending_bytes);
				783	Py_CLEAR(self->snapshot);
				784	Py_CLEAR(self->errors);
				785	Py_CLEAR(self->raw);
				786	self->decoded_chars_used = 0;
				787	self->pending_bytes_count = 0;
				788	self->encodefunc = NULL;
				789
				790	if (encoding == NULL) {
				791	/* Try os.device_encoding(fileno) */
				792	PyObject *fileno;
				793	fileno = PyObject_CallMethod(buffer, "fileno", NULL);
				794	/* Ignore only AttributeError and UnsupportedOperation */
				795	if (fileno == NULL) {
				796	if (PyErr_ExceptionMatches(PyExc_AttributeError) \|\|
				797	PyErr_ExceptionMatches(state->unsupported_operation)) {
				798	PyErr_Clear();
				799	}
				800	else {
				801	goto error;
				802	}
				803	}
				804	else {
				805	self->encoding = PyObject_CallMethod(state->os_module,
				806	"device_encoding",
				807	"N", fileno);
				808	if (self->encoding == NULL)
				809	goto error;
				810	else if (!PyUnicode_Check(self->encoding))
				811	Py_CLEAR(self->encoding);
				812	}
				813	}
				814	if (encoding == NULL && self->encoding == NULL) {
				815	if (state->locale_module == NULL) {
				816	state->locale_module = PyImport_ImportModule("locale");
				817	if (state->locale_module == NULL)
				818	goto catch_ImportError;
				819	else
				820	goto use_locale;
				821	}
				822	else {
				823	use_locale:
				824	self->encoding = PyObject_CallMethod(
				825	state->locale_module, "getpreferredencoding", NULL);
				826	if (self->encoding == NULL) {
				827	catch_ImportError:
				828	/*
				829	Importing locale can raise a ImportError because of
				830	_functools, and locale.getpreferredencoding can raise a
				831	ImportError if _locale is not available. These will happen
				832	during module building.
				833	*/
				834	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
				835	PyErr_Clear();
				836	self->encoding = PyUnicode_FromString("ascii");
				837	}
				838	else
				839	goto error;
				840	}
				841	else if (!PyUnicode_Check(self->encoding))
				842	Py_CLEAR(self->encoding);
				843	}
				844	}
				845	if (self->encoding != NULL)
				846	encoding = _PyUnicode_AsString(self->encoding);
				847	else if (encoding != NULL) {
				848	self->encoding = PyUnicode_FromString(encoding);
				849	if (self->encoding == NULL)
				850	goto error;
				851	}
				852	else {
				853	PyErr_SetString(PyExc_IOError,
				854	"could not determine default encoding");
				855	}
				856
				857	if (errors == NULL)
				858	errors = "strict";
				859	self->errors = PyBytes_FromString(errors);
				860	if (self->errors == NULL)
				861	goto error;
				862
				863	self->chunk_size = 8192;
				864	self->readuniversal = (newline == NULL \|\| newline[0] == '\0');
				865	self->line_buffering = line_buffering;
				866	self->readtranslate = (newline == NULL);
				867	if (newline) {
				868	self->readnl = PyUnicode_FromString(newline);
				869	if (self->readnl == NULL)
				870	return -1;
				871	}
				872	self->writetranslate = (newline == NULL \|\| newline[0] != '\0');
				873	if (!self->readuniversal && self->readnl) {
				874	self->writenl = _PyUnicode_AsString(self->readnl);
				875	if (!strcmp(self->writenl, "\n"))
				876	self->writenl = NULL;
				877	}
				878	#ifdef MS_WINDOWS
				879	else
				880	self->writenl = "\r\n";
				881	#endif
				882
				883	/* Build the decoder object */
				884	res = PyObject_CallMethod(buffer, "readable", NULL);
				885	if (res == NULL)
				886	goto error;
				887	r = PyObject_IsTrue(res);
				888	Py_DECREF(res);
				889	if (r == -1)
				890	goto error;
				891	if (r == 1) {
				892	self->decoder = PyCodec_IncrementalDecoder(
				893	encoding, errors);
				894	if (self->decoder == NULL)
				895	goto error;
				896
				897	if (self->readuniversal) {
				898	PyObject *incrementalDecoder = PyObject_CallFunction(
				899	(PyObject *)&PyIncrementalNewlineDecoder_Type,
				900	"Oi", self->decoder, (int)self->readtranslate);
				901	if (incrementalDecoder == NULL)
				902	goto error;
				903	Py_CLEAR(self->decoder);
				904	self->decoder = incrementalDecoder;
				905	}
				906	}
				907
				908	/* Build the encoder object */
				909	res = PyObject_CallMethod(buffer, "writable", NULL);
				910	if (res == NULL)
				911	goto error;
				912	r = PyObject_IsTrue(res);
				913	Py_DECREF(res);
				914	if (r == -1)
				915	goto error;
				916	if (r == 1) {
				917	PyObject *ci;
				918	self->encoder = PyCodec_IncrementalEncoder(
				919	encoding, errors);
				920	if (self->encoder == NULL)
				921	goto error;
				922	/* Get the normalized named of the codec */
				923	ci = _PyCodec_Lookup(encoding);
				924	if (ci == NULL)
				925	goto error;
				926	res = PyObject_GetAttrString(ci, "name");
				927	Py_DECREF(ci);
				928	if (res == NULL)
				929	PyErr_Clear();
				930	else if (PyUnicode_Check(res)) {
				931	encodefuncentry *e = encodefuncs;
				932	while (e->name != NULL) {
				933	if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
				934	self->encodefunc = e->encodefunc;
				935	break;
				936	}
				937	e++;
				938	}
				939	}
				940	Py_XDECREF(res);
				941	}
				942
				943	self->buffer = buffer;
				944	Py_INCREF(buffer);
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	945
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	946	if (Py_TYPE(buffer) == &PyBufferedReader_Type \|\|
				947	Py_TYPE(buffer) == &PyBufferedWriter_Type \|\|
				948	Py_TYPE(buffer) == &PyBufferedRandom_Type) {
				949	raw = PyObject_GetAttrString(buffer, "raw");
				950	/* Cache the raw FileIO object to speed up 'closed' checks */
				951	if (raw == NULL)
				952	PyErr_Clear();
				953	else if (Py_TYPE(raw) == &PyFileIO_Type)
				954	self->raw = raw;
				955	else
				956	Py_DECREF(raw);
				957	}
				958
				959	res = PyObject_CallMethod(buffer, "seekable", NULL);
				960	if (res == NULL)
				961	goto error;
				962	self->seekable = self->telling = PyObject_IsTrue(res);
				963	Py_DECREF(res);
				964
				965	self->ok = 1;
				966	return 0;
				967
				968	error:
				969	return -1;
				970	}
				971
				972	static int
				973	_TextIOWrapper_clear(PyTextIOWrapperObject *self)
				974	{
				975	if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
				976	return -1;
				977	self->ok = 0;
				978	Py_CLEAR(self->buffer);
				979	Py_CLEAR(self->encoding);
				980	Py_CLEAR(self->encoder);
				981	Py_CLEAR(self->decoder);
				982	Py_CLEAR(self->readnl);
				983	Py_CLEAR(self->decoded_chars);
				984	Py_CLEAR(self->pending_bytes);
				985	Py_CLEAR(self->snapshot);
				986	Py_CLEAR(self->errors);
				987	Py_CLEAR(self->raw);
				988	return 0;
				989	}
				990
				991	static void
				992	TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
				993	{
				994	if (_TextIOWrapper_clear(self) < 0)
				995	return;
				996	_PyObject_GC_UNTRACK(self);
				997	if (self->weakreflist != NULL)
				998	PyObject_ClearWeakRefs((PyObject *)self);
				999	Py_CLEAR(self->dict);
				1000	Py_TYPE(self)->tp_free((PyObject *)self);
				1001	}
				1002
				1003	static int
				1004	TextIOWrapper_traverse(PyTextIOWrapperObject self, visitproc visit, void arg)
				1005	{
				1006	Py_VISIT(self->buffer);
				1007	Py_VISIT(self->encoding);
				1008	Py_VISIT(self->encoder);
				1009	Py_VISIT(self->decoder);
				1010	Py_VISIT(self->readnl);
				1011	Py_VISIT(self->decoded_chars);
				1012	Py_VISIT(self->pending_bytes);
				1013	Py_VISIT(self->snapshot);
				1014	Py_VISIT(self->errors);
				1015	Py_VISIT(self->raw);
				1016
				1017	Py_VISIT(self->dict);
				1018	return 0;
				1019	}
				1020
				1021	static int
				1022	TextIOWrapper_clear(PyTextIOWrapperObject *self)
				1023	{
				1024	if (_TextIOWrapper_clear(self) < 0)
				1025	return -1;
				1026	Py_CLEAR(self->dict);
				1027	return 0;
				1028	}
				1029
				1030	static PyObject *
				1031	TextIOWrapper_closed_get(PyTextIOWrapperObject self, void context);
				1032
				1033	/* This macro takes some shortcuts to make the common case faster. */
				1034	#define CHECK_CLOSED(self) \
				1035	do { \
				1036	int r; \
				1037	PyObject *_res; \
				1038	if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
				1039	if (self->raw != NULL) \
				1040	r = _PyFileIO_closed(self->raw); \
				1041	else { \
				1042	_res = TextIOWrapper_closed_get(self, NULL); \
				1043	if (_res == NULL) \
				1044	return NULL; \
				1045	r = PyObject_IsTrue(_res); \
				1046	Py_DECREF(_res); \
				1047	if (r < 0) \
				1048	return NULL; \
				1049	} \
				1050	if (r > 0) { \
				1051	PyErr_SetString(PyExc_ValueError, \
				1052	"I/O operation on closed file."); \
				1053	return NULL; \
				1054	} \
				1055	} \
				1056	else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
				1057	return NULL; \
				1058	} while (0)
				1059
				1060	#define CHECK_INITIALIZED(self) \
				1061	if (self->ok <= 0) { \
				1062	PyErr_SetString(PyExc_ValueError, \
				1063	"I/O operation on uninitialized object"); \
				1064	return NULL; \
				1065	}
				1066
				1067	#define CHECK_INITIALIZED_INT(self) \
				1068	if (self->ok <= 0) { \
				1069	PyErr_SetString(PyExc_ValueError, \
				1070	"I/O operation on uninitialized object"); \
				1071	return -1; \
				1072	}
				1073
				1074
				1075	Py_LOCAL_INLINE(const Py_UNICODE *)
				1076	findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
				1077	{
				1078	/* like wcschr, but doesn't stop at NULL characters */
				1079	while (size-- > 0) {
				1080	if (*s == ch)
				1081	return s;
				1082	s++;
				1083	}
				1084	return NULL;
				1085	}
				1086
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	1087	/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1088	underlying buffered object, though. */
				1089	static int
				1090	_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
				1091	{
				1092	PyObject b, ret;
				1093
				1094	if (self->pending_bytes == NULL)
				1095	return 0;
				1096	b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
				1097	if (b == NULL)
				1098	return -1;
				1099	ret = PyObject_CallMethodObjArgs(self->buffer,
				1100	_PyIO_str_write, b, NULL);
				1101	Py_DECREF(b);
				1102	if (ret == NULL)
				1103	return -1;
				1104	Py_DECREF(ret);
				1105	Py_CLEAR(self->pending_bytes);
				1106	self->pending_bytes_count = 0;
				1107	return 0;
				1108	}
				1109
				1110	static PyObject *
				1111	TextIOWrapper_write(PyTextIOWrapperObject self, PyObject args)
				1112	{
				1113	PyObject *ret;
				1114	PyObject text; / owned reference */
				1115	PyObject *b;
				1116	Py_ssize_t textlen;
				1117	int haslf = 0;
				1118	int needflush = 0;
				1119
				1120	CHECK_INITIALIZED(self);
				1121
				1122	if (!PyArg_ParseTuple(args, "U:write", &text)) {
				1123	return NULL;
				1124	}
				1125
				1126	CHECK_CLOSED(self);
				1127
				1128	Py_INCREF(text);
				1129
				1130	textlen = PyUnicode_GetSize(text);
				1131
				1132	if ((self->writetranslate && self->writenl != NULL) \|\| self->line_buffering)
				1133	if (findchar(PyUnicode_AS_UNICODE(text),
				1134	PyUnicode_GET_SIZE(text), '\n'))
				1135	haslf = 1;
				1136
				1137	if (haslf && self->writetranslate && self->writenl != NULL) {
				1138	PyObject *newtext = PyObject_CallMethod(
				1139	text, "replace", "ss", "\n", self->writenl);
				1140	Py_DECREF(text);
				1141	if (newtext == NULL)
				1142	return NULL;
				1143	text = newtext;
				1144	}
				1145
				1146	if (self->line_buffering &&
				1147	(haslf \|\|
				1148	findchar(PyUnicode_AS_UNICODE(text),
				1149	PyUnicode_GET_SIZE(text), '\r')))
				1150	needflush = 1;
				1151
				1152	/* XXX What if we were just reading? */
				1153	if (self->encodefunc != NULL)
				1154	b = (self->encodefunc)((PyObject ) self, text);
				1155	else
				1156	b = PyObject_CallMethodObjArgs(self->encoder,
				1157	_PyIO_str_encode, text, NULL);
				1158	Py_DECREF(text);
				1159	if (b == NULL)
				1160	return NULL;
				1161
				1162	if (self->pending_bytes == NULL) {
				1163	self->pending_bytes = PyList_New(0);
				1164	if (self->pending_bytes == NULL) {
				1165	Py_DECREF(b);
				1166	return NULL;
				1167	}
				1168	self->pending_bytes_count = 0;
				1169	}
				1170	if (PyList_Append(self->pending_bytes, b) < 0) {
				1171	Py_DECREF(b);
				1172	return NULL;
				1173	}
				1174	self->pending_bytes_count += PyBytes_GET_SIZE(b);
				1175	Py_DECREF(b);
				1176	if (self->pending_bytes_count > self->chunk_size \|\| needflush) {
				1177	if (_TextIOWrapper_writeflush(self) < 0)
				1178	return NULL;
				1179	}
Antoine Pitrou	24f3629	2009-03-28 22:16:42 +0000	[diff] [blame]	1180
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1181	if (needflush) {
				1182	ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
				1183	if (ret == NULL)
				1184	return NULL;
				1185	Py_DECREF(ret);
				1186	}
				1187
				1188	Py_CLEAR(self->snapshot);
				1189
				1190	if (self->decoder) {
				1191	ret = PyObject_CallMethod(self->decoder, "reset", NULL);
				1192	if (ret == NULL)
				1193	return NULL;
				1194	Py_DECREF(ret);
				1195	}
				1196
				1197	return PyLong_FromSsize_t(textlen);
				1198	}
				1199
				1200	/* Steal a reference to chars and store it in the decoded_char buffer;
				1201	*/
				1202	static void
				1203	TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject self, PyObject chars)
				1204	{
				1205	Py_CLEAR(self->decoded_chars);
				1206	self->decoded_chars = chars;
				1207	self->decoded_chars_used = 0;
				1208	}
				1209
				1210	static PyObject *
				1211	TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
				1212	{
				1213	PyObject *chars;
				1214	Py_ssize_t avail;
				1215
				1216	if (self->decoded_chars == NULL)
				1217	return PyUnicode_FromStringAndSize(NULL, 0);
				1218
				1219	avail = (PyUnicode_GET_SIZE(self->decoded_chars)
				1220	- self->decoded_chars_used);
				1221
				1222	assert(avail >= 0);
				1223
				1224	if (n < 0 \|\| n > avail)
				1225	n = avail;
				1226
				1227	if (self->decoded_chars_used > 0 \|\| n < avail) {
				1228	chars = PyUnicode_FromUnicode(
				1229	PyUnicode_AS_UNICODE(self->decoded_chars)
				1230	+ self->decoded_chars_used, n);
				1231	if (chars == NULL)
				1232	return NULL;
				1233	}
				1234	else {
				1235	chars = self->decoded_chars;
				1236	Py_INCREF(chars);
				1237	}
				1238
				1239	self->decoded_chars_used += n;
				1240	return chars;
				1241	}
				1242
				1243	/* Read and decode the next chunk of data from the BufferedReader.
				1244	*/
				1245	static int
				1246	TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
				1247	{
				1248	PyObject *dec_buffer = NULL;
				1249	PyObject *dec_flags = NULL;
				1250	PyObject *input_chunk = NULL;
				1251	PyObject decoded_chars, chunk_size;
				1252	int eof;
				1253
				1254	/* The return value is True unless EOF was reached. The decoded string is
				1255	* placed in self._decoded_chars (replacing its previous value). The
				1256	* entire input chunk is sent to the decoder, though some of it may remain
				1257	* buffered in the decoder, yet to be converted.
				1258	*/
				1259
				1260	if (self->decoder == NULL) {
				1261	PyErr_SetString(PyExc_ValueError, "no decoder");
				1262	return -1;
				1263	}
				1264
				1265	if (self->telling) {
				1266	/* To prepare for tell(), we need to snapshot a point in the file
				1267	* where the decoder's input buffer is empty.
				1268	*/
				1269
				1270	PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
				1271	_PyIO_str_getstate, NULL);
				1272	if (state == NULL)
				1273	return -1;
				1274	/* Given this, we know there was a valid snapshot point
				1275	* len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
				1276	*/
				1277	if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
				1278	Py_DECREF(state);
				1279	return -1;
				1280	}
				1281	Py_INCREF(dec_buffer);
				1282	Py_INCREF(dec_flags);
				1283	Py_DECREF(state);
				1284	}
				1285
				1286	/* Read a chunk, decode it, and put the result in self._decoded_chars. */
				1287	chunk_size = PyLong_FromSsize_t(self->chunk_size);
				1288	if (chunk_size == NULL)
				1289	goto fail;
				1290	input_chunk = PyObject_CallMethodObjArgs(self->buffer,
				1291	_PyIO_str_read1, chunk_size, NULL);
				1292	Py_DECREF(chunk_size);
				1293	if (input_chunk == NULL)
				1294	goto fail;
				1295	assert(PyBytes_Check(input_chunk));
				1296
				1297	eof = (PyBytes_Size(input_chunk) == 0);
				1298
				1299	if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
				1300	decoded_chars = _PyIncrementalNewlineDecoder_decode(
				1301	self->decoder, input_chunk, eof);
				1302	}
				1303	else {
				1304	decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
				1305	_PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
				1306	}
				1307
				1308	/* TODO sanity check: isinstance(decoded_chars, unicode) */
				1309	if (decoded_chars == NULL)
				1310	goto fail;
				1311	TextIOWrapper_set_decoded_chars(self, decoded_chars);
				1312	if (PyUnicode_GET_SIZE(decoded_chars) > 0)
				1313	eof = 0;
				1314
				1315	if (self->telling) {
				1316	/* At the snapshot point, len(dec_buffer) bytes before the read, the
				1317	* next input to be decoded is dec_buffer + input_chunk.
				1318	*/
				1319	PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
				1320	if (next_input == NULL)
				1321	goto fail;
				1322	assert (PyBytes_Check(next_input));
				1323	Py_DECREF(dec_buffer);
				1324	Py_CLEAR(self->snapshot);
				1325	self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
				1326	}
				1327	Py_DECREF(input_chunk);
				1328
				1329	return (eof == 0);
				1330
				1331	fail:
				1332	Py_XDECREF(dec_buffer);
				1333	Py_XDECREF(dec_flags);
				1334	Py_XDECREF(input_chunk);
				1335	return -1;
				1336	}
				1337
				1338	static PyObject *
				1339	TextIOWrapper_read(PyTextIOWrapperObject self, PyObject args)
				1340	{
				1341	Py_ssize_t n = -1;
				1342	PyObject result = NULL, chunks = NULL;
				1343
				1344	CHECK_INITIALIZED(self);
				1345
				1346	if (!PyArg_ParseTuple(args, "\|n:read", &n))
				1347	return NULL;
				1348
				1349	CHECK_CLOSED(self);
				1350
Benjamin Peterson	a1b4901	2009-03-31 23:11:32 +0000	[diff] [blame]	1351	if (self->decoder == NULL) {
				1352	PyErr_SetString(PyExc_IOError, "not readable");
				1353	return NULL;
				1354	}
				1355
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1356	if (_TextIOWrapper_writeflush(self) < 0)
				1357	return NULL;
				1358
				1359	if (n < 0) {
				1360	/* Read everything */
				1361	PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
				1362	PyObject *decoded;
				1363	if (bytes == NULL)
				1364	goto fail;
				1365	decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
				1366	bytes, Py_True, NULL);
				1367	Py_DECREF(bytes);
				1368	if (decoded == NULL)
				1369	goto fail;
				1370
				1371	result = TextIOWrapper_get_decoded_chars(self, -1);
				1372
				1373	if (result == NULL) {
				1374	Py_DECREF(decoded);
				1375	return NULL;
				1376	}
				1377
				1378	PyUnicode_AppendAndDel(&result, decoded);
				1379	if (result == NULL)
				1380	goto fail;
				1381
				1382	Py_CLEAR(self->snapshot);
				1383	return result;
				1384	}
				1385	else {
				1386	int res = 1;
				1387	Py_ssize_t remaining = n;
				1388
				1389	result = TextIOWrapper_get_decoded_chars(self, n);
				1390	if (result == NULL)
				1391	goto fail;
				1392	remaining -= PyUnicode_GET_SIZE(result);
				1393
				1394	/* Keep reading chunks until we have n characters to return */
				1395	while (remaining > 0) {
				1396	res = TextIOWrapper_read_chunk(self);
				1397	if (res < 0)
				1398	goto fail;
				1399	if (res == 0) /* EOF */
				1400	break;
				1401	if (chunks == NULL) {
				1402	chunks = PyList_New(0);
				1403	if (chunks == NULL)
				1404	goto fail;
				1405	}
				1406	if (PyList_Append(chunks, result) < 0)
				1407	goto fail;
				1408	Py_DECREF(result);
				1409	result = TextIOWrapper_get_decoded_chars(self, remaining);
				1410	if (result == NULL)
				1411	goto fail;
				1412	remaining -= PyUnicode_GET_SIZE(result);
				1413	}
				1414	if (chunks != NULL) {
				1415	if (result != NULL && PyList_Append(chunks, result) < 0)
				1416	goto fail;
				1417	Py_CLEAR(result);
				1418	result = PyUnicode_Join(_PyIO_empty_str, chunks);
				1419	if (result == NULL)
				1420	goto fail;
				1421	Py_CLEAR(chunks);
				1422	}
				1423	return result;
				1424	}
				1425	fail:
				1426	Py_XDECREF(result);
				1427	Py_XDECREF(chunks);
				1428	return NULL;
				1429	}
				1430
				1431
				1432	/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
				1433	that is to the NUL character. Otherwise the function will produce
				1434	incorrect results. */
				1435	static Py_UNICODE *
				1436	find_control_char(Py_UNICODE start, Py_UNICODE end, Py_UNICODE ch)
				1437	{
				1438	Py_UNICODE *s = start;
				1439	for (;;) {
				1440	while (*s > ch)
				1441	s++;
				1442	if (*s == ch)
				1443	return s;
				1444	if (s == end)
				1445	return NULL;
				1446	s++;
				1447	}
				1448	}
				1449
				1450	Py_ssize_t
				1451	_PyIO_find_line_ending(
				1452	int translated, int universal, PyObject *readnl,
				1453	Py_UNICODE start, Py_UNICODE end, Py_ssize_t *consumed)
				1454	{
				1455	Py_ssize_t len = end - start;
				1456
				1457	if (translated) {
				1458	/* Newlines are already translated, only search for \n */
				1459	Py_UNICODE *pos = find_control_char(start, end, '\n');
				1460	if (pos != NULL)
				1461	return pos - start + 1;
				1462	else {
				1463	*consumed = len;
				1464	return -1;
				1465	}
				1466	}
				1467	else if (universal) {
				1468	/* Universal newline search. Find any of \r, \r\n, \n
				1469	* The decoder ensures that \r\n are not split in two pieces
				1470	*/
				1471	Py_UNICODE *s = start;
				1472	for (;;) {
				1473	Py_UNICODE ch;
				1474	/* Fast path for non-control chars. The loop always ends
				1475	since the Py_UNICODE storage is NUL-terminated. */
				1476	while (*s > '\r')
				1477	s++;
				1478	if (s >= end) {
				1479	*consumed = len;
				1480	return -1;
				1481	}
				1482	ch = *s++;
				1483	if (ch == '\n')
				1484	return s - start;
				1485	if (ch == '\r') {
				1486	if (*s == '\n')
				1487	return s - start + 1;
				1488	else
				1489	return s - start;
				1490	}
				1491	}
				1492	}
				1493	else {
				1494	/* Non-universal mode. */
				1495	Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
				1496	Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
				1497	if (readnl_len == 1) {
				1498	Py_UNICODE *pos = find_control_char(start, end, nl[0]);
				1499	if (pos != NULL)
				1500	return pos - start + 1;
				1501	*consumed = len;
				1502	return -1;
				1503	}
				1504	else {
				1505	Py_UNICODE *s = start;
				1506	Py_UNICODE *e = end - readnl_len + 1;
				1507	Py_UNICODE *pos;
				1508	if (e < s)
				1509	e = s;
				1510	while (s < e) {
				1511	Py_ssize_t i;
				1512	Py_UNICODE *pos = find_control_char(s, end, nl[0]);
				1513	if (pos == NULL \|\| pos >= e)
				1514	break;
				1515	for (i = 1; i < readnl_len; i++) {
				1516	if (pos[i] != nl[i])
				1517	break;
				1518	}
				1519	if (i == readnl_len)
				1520	return pos - start + readnl_len;
				1521	s = pos + 1;
				1522	}
				1523	pos = find_control_char(e, end, nl[0]);
				1524	if (pos == NULL)
				1525	*consumed = len;
				1526	else
				1527	*consumed = pos - start;
				1528	return -1;
				1529	}
				1530	}
				1531	}
				1532
				1533	static PyObject *
				1534	_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
				1535	{
				1536	PyObject line = NULL, chunks = NULL, *remaining = NULL;
				1537	Py_ssize_t start, endpos, chunked, offset_to_buffer;
				1538	int res;
				1539
				1540	CHECK_CLOSED(self);
				1541
				1542	if (_TextIOWrapper_writeflush(self) < 0)
				1543	return NULL;
				1544
				1545	chunked = 0;
				1546
				1547	while (1) {
				1548	Py_UNICODE *ptr;
				1549	Py_ssize_t line_len;
				1550	Py_ssize_t consumed = 0;
				1551
				1552	/* First, get some data if necessary */
				1553	res = 1;
				1554	while (!self->decoded_chars \|\|
				1555	!PyUnicode_GET_SIZE(self->decoded_chars)) {
				1556	res = TextIOWrapper_read_chunk(self);
				1557	if (res < 0)
				1558	goto error;
				1559	if (res == 0)
				1560	break;
				1561	}
				1562	if (res == 0) {
				1563	/* end of file */
				1564	TextIOWrapper_set_decoded_chars(self, NULL);
				1565	Py_CLEAR(self->snapshot);
				1566	start = endpos = offset_to_buffer = 0;
				1567	break;
				1568	}
				1569
				1570	if (remaining == NULL) {
				1571	line = self->decoded_chars;
				1572	start = self->decoded_chars_used;
				1573	offset_to_buffer = 0;
				1574	Py_INCREF(line);
				1575	}
				1576	else {
				1577	assert(self->decoded_chars_used == 0);
				1578	line = PyUnicode_Concat(remaining, self->decoded_chars);
				1579	start = 0;
				1580	offset_to_buffer = PyUnicode_GET_SIZE(remaining);
				1581	Py_CLEAR(remaining);
				1582	if (line == NULL)
				1583	goto error;
				1584	}
				1585
				1586	ptr = PyUnicode_AS_UNICODE(line);
				1587	line_len = PyUnicode_GET_SIZE(line);
				1588
				1589	endpos = _PyIO_find_line_ending(
				1590	self->readtranslate, self->readuniversal, self->readnl,
				1591	ptr + start, ptr + line_len, &consumed);
				1592	if (endpos >= 0) {
				1593	endpos += start;
				1594	if (limit >= 0 && (endpos - start) + chunked >= limit)
				1595	endpos = start + limit - chunked;
				1596	break;
				1597	}
				1598
				1599	/* We can put aside up to `endpos` */
				1600	endpos = consumed + start;
				1601	if (limit >= 0 && (endpos - start) + chunked >= limit) {
				1602	/* Didn't find line ending, but reached length limit */
				1603	endpos = start + limit - chunked;
				1604	break;
				1605	}
				1606
				1607	if (endpos > start) {
				1608	/* No line ending seen yet - put aside current data */
				1609	PyObject *s;
				1610	if (chunks == NULL) {
				1611	chunks = PyList_New(0);
				1612	if (chunks == NULL)
				1613	goto error;
				1614	}
				1615	s = PyUnicode_FromUnicode(ptr + start, endpos - start);
				1616	if (s == NULL)
				1617	goto error;
				1618	if (PyList_Append(chunks, s) < 0) {
				1619	Py_DECREF(s);
				1620	goto error;
				1621	}
				1622	chunked += PyUnicode_GET_SIZE(s);
				1623	Py_DECREF(s);
				1624	}
				1625	/* There may be some remaining bytes we'll have to prepend to the
				1626	next chunk of data */
				1627	if (endpos < line_len) {
				1628	remaining = PyUnicode_FromUnicode(
				1629	ptr + endpos, line_len - endpos);
				1630	if (remaining == NULL)
				1631	goto error;
				1632	}
				1633	Py_CLEAR(line);
				1634	/* We have consumed the buffer */
				1635	TextIOWrapper_set_decoded_chars(self, NULL);
				1636	}
				1637
				1638	if (line != NULL) {
				1639	/* Our line ends in the current buffer */
				1640	self->decoded_chars_used = endpos - offset_to_buffer;
				1641	if (start > 0 \|\| endpos < PyUnicode_GET_SIZE(line)) {
				1642	if (start == 0 && Py_REFCNT(line) == 1) {
				1643	if (PyUnicode_Resize(&line, endpos) < 0)
				1644	goto error;
				1645	}
				1646	else {
				1647	PyObject *s = PyUnicode_FromUnicode(
				1648	PyUnicode_AS_UNICODE(line) + start, endpos - start);
				1649	Py_CLEAR(line);
				1650	if (s == NULL)
				1651	goto error;
				1652	line = s;
				1653	}
				1654	}
				1655	}
				1656	if (remaining != NULL) {
				1657	if (chunks == NULL) {
				1658	chunks = PyList_New(0);
				1659	if (chunks == NULL)
				1660	goto error;
				1661	}
				1662	if (PyList_Append(chunks, remaining) < 0)
				1663	goto error;
				1664	Py_CLEAR(remaining);
				1665	}
				1666	if (chunks != NULL) {
				1667	if (line != NULL && PyList_Append(chunks, line) < 0)
				1668	goto error;
				1669	Py_CLEAR(line);
				1670	line = PyUnicode_Join(_PyIO_empty_str, chunks);
				1671	if (line == NULL)
				1672	goto error;
				1673	Py_DECREF(chunks);
				1674	}
				1675	if (line == NULL)
				1676	line = PyUnicode_FromStringAndSize(NULL, 0);
				1677
				1678	return line;
				1679
				1680	error:
				1681	Py_XDECREF(chunks);
				1682	Py_XDECREF(remaining);
				1683	Py_XDECREF(line);
				1684	return NULL;
				1685	}
				1686
				1687	static PyObject *
				1688	TextIOWrapper_readline(PyTextIOWrapperObject self, PyObject args)
				1689	{
				1690	Py_ssize_t limit = -1;
				1691
				1692	CHECK_INITIALIZED(self);
				1693	if (!PyArg_ParseTuple(args, "\|n:readline", &limit)) {
				1694	return NULL;
				1695	}
				1696	return _TextIOWrapper_readline(self, limit);
				1697	}
				1698
				1699	/* Seek and Tell */
				1700
				1701	typedef struct {
				1702	Py_off_t start_pos;
				1703	int dec_flags;
				1704	int bytes_to_feed;
				1705	int chars_to_skip;
				1706	char need_eof;
				1707	} CookieStruct;
				1708
				1709	/*
				1710	To speed up cookie packing/unpacking, we store the fields in a temporary
				1711	string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
				1712	The following macros define at which offsets in the intermediary byte
				1713	string the various CookieStruct fields will be stored.
				1714	*/
				1715
				1716	#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
				1717
				1718	#if defined(WORDS_BIGENDIAN)
				1719
				1720	# define IS_LITTLE_ENDIAN 0
				1721
				1722	/* We want the least significant byte of start_pos to also be the least
				1723	significant byte of the cookie, which means that in big-endian mode we
				1724	must copy the fields in reverse order. */
				1725
				1726	# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
				1727	# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
				1728	# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
				1729	# define OFF_CHARS_TO_SKIP (sizeof(char))
				1730	# define OFF_NEED_EOF 0
				1731
				1732	#else
				1733
				1734	# define IS_LITTLE_ENDIAN 1
				1735
				1736	/* Little-endian mode: the least significant byte of start_pos will
				1737	naturally end up the least significant byte of the cookie. */
				1738
				1739	# define OFF_START_POS 0
				1740	# define OFF_DEC_FLAGS (sizeof(Py_off_t))
				1741	# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
				1742	# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
				1743	# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
				1744
				1745	#endif
				1746
				1747	static int
				1748	TextIOWrapper_parseCookie(CookieStruct cookie, PyObject cookieObj)
				1749	{
				1750	unsigned char buffer[COOKIE_BUF_LEN];
				1751	PyLongObject cookieLong = (PyLongObject )PyNumber_Long(cookieObj);
				1752	if (cookieLong == NULL)
				1753	return -1;
				1754
				1755	if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
				1756	IS_LITTLE_ENDIAN, 0) < 0) {
				1757	Py_DECREF(cookieLong);
				1758	return -1;
				1759	}
				1760	Py_DECREF(cookieLong);
				1761
Antoine Pitrou	2db74c2	2009-03-06 21:49:02 +0000	[diff] [blame]	1762	memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
				1763	memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
				1764	memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
				1765	memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
				1766	memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1767
				1768	return 0;
				1769	}
				1770
				1771	static PyObject *
				1772	TextIOWrapper_buildCookie(CookieStruct *cookie)
				1773	{
				1774	unsigned char buffer[COOKIE_BUF_LEN];
				1775
Antoine Pitrou	2db74c2	2009-03-06 21:49:02 +0000	[diff] [blame]	1776	memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
				1777	memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
				1778	memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
				1779	memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
				1780	memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	1781
				1782	return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
				1783	}
				1784	#undef IS_LITTLE_ENDIAN
				1785
				1786	static int
				1787	_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
				1788	CookieStruct *cookie)
				1789	{
				1790	PyObject *res;
				1791	/* When seeking to the start of the stream, we call decoder.reset()
				1792	rather than decoder.getstate().
				1793	This is for a few decoders such as utf-16 for which the state value
				1794	at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
				1795	utf-16, that we are expecting a BOM).
				1796	*/
				1797	if (cookie->start_pos == 0 && cookie->dec_flags == 0)
				1798	res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
				1799	else
				1800	res = PyObject_CallMethod(self->decoder, "setstate",
				1801	"((yi))", "", cookie->dec_flags);
				1802	if (res == NULL)
				1803	return -1;
				1804	Py_DECREF(res);
				1805	return 0;
				1806	}
				1807
				1808	static PyObject *
				1809	TextIOWrapper_seek(PyTextIOWrapperObject self, PyObject args)
				1810	{
				1811	PyObject cookieObj, posobj;
				1812	CookieStruct cookie;
				1813	int whence = 0;
				1814	static PyObject *zero = NULL;
				1815	PyObject *res;
				1816	int cmp;
				1817
				1818	CHECK_INITIALIZED(self);
				1819
				1820	if (zero == NULL) {
				1821	zero = PyLong_FromLong(0L);
				1822	if (zero == NULL)
				1823	return NULL;
				1824	}
				1825
				1826	if (!PyArg_ParseTuple(args, "O\|i:seek", &cookieObj, &whence))
				1827	return NULL;
				1828	CHECK_CLOSED(self);
				1829
				1830	Py_INCREF(cookieObj);
				1831
				1832	if (!self->seekable) {
				1833	PyErr_SetString(PyExc_IOError,
				1834	"underlying stream is not seekable");
				1835	goto fail;
				1836	}
				1837
				1838	if (whence == 1) {
				1839	/* seek relative to current position */
				1840	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
				1841	if (cmp < 0)
				1842	goto fail;
				1843
				1844	if (cmp == 0) {
				1845	PyErr_SetString(PyExc_IOError,
				1846	"can't do nonzero cur-relative seeks");
				1847	goto fail;
				1848	}
				1849
				1850	/* Seeking to the current position should attempt to
				1851	* sync the underlying buffer with the current position.
				1852	*/
				1853	Py_DECREF(cookieObj);
				1854	cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
				1855	if (cookieObj == NULL)
				1856	goto fail;
				1857	}
				1858	else if (whence == 2) {
				1859	/* seek relative to end of file */
				1860
				1861	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
				1862	if (cmp < 0)
				1863	goto fail;
				1864
				1865	if (cmp == 0) {
				1866	PyErr_SetString(PyExc_IOError,
				1867	"can't do nonzero end-relative seeks");
				1868	goto fail;
				1869	}
				1870
				1871	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				1872	if (res == NULL)
				1873	goto fail;
				1874	Py_DECREF(res);
				1875
				1876	TextIOWrapper_set_decoded_chars(self, NULL);
				1877	Py_CLEAR(self->snapshot);
				1878	if (self->decoder) {
				1879	res = PyObject_CallMethod(self->decoder, "reset", NULL);
				1880	if (res == NULL)
				1881	goto fail;
				1882	Py_DECREF(res);
				1883	}
				1884
				1885	res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
				1886	Py_XDECREF(cookieObj);
				1887	return res;
				1888	}
				1889	else if (whence != 0) {
				1890	PyErr_Format(PyExc_ValueError,
				1891	"invalid whence (%d, should be 0, 1 or 2)", whence);
				1892	goto fail;
				1893	}
				1894
				1895	cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
				1896	if (cmp < 0)
				1897	goto fail;
				1898
				1899	if (cmp == 1) {
				1900	PyErr_Format(PyExc_ValueError,
				1901	"negative seek position %R", cookieObj);
				1902	goto fail;
				1903	}
				1904
				1905	res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
				1906	if (res == NULL)
				1907	goto fail;
				1908	Py_DECREF(res);
				1909
				1910	/* The strategy of seek() is to go back to the safe start point
				1911	* and replay the effect of read(chars_to_skip) from there.
				1912	*/
				1913	if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
				1914	goto fail;
				1915
				1916	/* Seek back to the safe start point. */
				1917	posobj = PyLong_FromOff_t(cookie.start_pos);
				1918	if (posobj == NULL)
				1919	goto fail;
				1920	res = PyObject_CallMethodObjArgs(self->buffer,
				1921	_PyIO_str_seek, posobj, NULL);
				1922	Py_DECREF(posobj);
				1923	if (res == NULL)
				1924	goto fail;
				1925	Py_DECREF(res);
				1926
				1927	TextIOWrapper_set_decoded_chars(self, NULL);
				1928	Py_CLEAR(self->snapshot);
				1929
				1930	/* Restore the decoder to its state from the safe start point. */
				1931	if (self->decoder) {
				1932	if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
				1933	goto fail;
				1934	}
				1935
				1936	if (cookie.chars_to_skip) {
				1937	/* Just like _read_chunk, feed the decoder and save a snapshot. */
				1938	PyObject *input_chunk = PyObject_CallMethod(
				1939	self->buffer, "read", "i", cookie.bytes_to_feed);
				1940	PyObject *decoded;
				1941
				1942	if (input_chunk == NULL)
				1943	goto fail;
				1944
				1945	assert (PyBytes_Check(input_chunk));
				1946
				1947	self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
				1948	if (self->snapshot == NULL) {
				1949	Py_DECREF(input_chunk);
				1950	goto fail;
				1951	}
				1952
				1953	decoded = PyObject_CallMethod(self->decoder, "decode",
				1954	"Oi", input_chunk, (int)cookie.need_eof);
				1955
				1956	if (decoded == NULL)
				1957	goto fail;
				1958
				1959	TextIOWrapper_set_decoded_chars(self, decoded);
				1960
				1961	/* Skip chars_to_skip of the decoded characters. */
				1962	if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
				1963	PyErr_SetString(PyExc_IOError, "can't restore logical file position");
				1964	goto fail;
				1965	}
				1966	self->decoded_chars_used = cookie.chars_to_skip;
				1967	}
				1968	else {
				1969	self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
				1970	if (self->snapshot == NULL)
				1971	goto fail;
				1972	}
				1973
				1974	return cookieObj;
				1975	fail:
				1976	Py_XDECREF(cookieObj);
				1977	return NULL;
				1978
				1979	}
				1980
				1981	static PyObject *
				1982	TextIOWrapper_tell(PyTextIOWrapperObject self, PyObject args)
				1983	{
				1984	PyObject *res;
				1985	PyObject *posobj = NULL;
				1986	CookieStruct cookie = {0,0,0,0,0};
				1987	PyObject *next_input;
				1988	Py_ssize_t chars_to_skip, chars_decoded;
				1989	PyObject *saved_state = NULL;
				1990	char input, input_end;
				1991
				1992	CHECK_INITIALIZED(self);
				1993	CHECK_CLOSED(self);
				1994
				1995	if (!self->seekable) {
				1996	PyErr_SetString(PyExc_IOError,
				1997	"underlying stream is not seekable");
				1998	goto fail;
				1999	}
				2000	if (!self->telling) {
				2001	PyErr_SetString(PyExc_IOError,
				2002	"telling position disabled by next() call");
				2003	goto fail;
				2004	}
				2005
				2006	if (_TextIOWrapper_writeflush(self) < 0)
				2007	return NULL;
				2008	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				2009	if (res == NULL)
				2010	goto fail;
				2011	Py_DECREF(res);
				2012
				2013	posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
				2014	if (posobj == NULL)
				2015	goto fail;
				2016
				2017	if (self->decoder == NULL \|\| self->snapshot == NULL) {
				2018	assert (self->decoded_chars == NULL \|\| PyUnicode_GetSize(self->decoded_chars) == 0);
				2019	return posobj;
				2020	}
				2021
				2022	#if defined(HAVE_LARGEFILE_SUPPORT)
				2023	cookie.start_pos = PyLong_AsLongLong(posobj);
				2024	#else
				2025	cookie.start_pos = PyLong_AsLong(posobj);
				2026	#endif
				2027	if (PyErr_Occurred())
				2028	goto fail;
				2029
				2030	/* Skip backward to the snapshot point (see _read_chunk). */
				2031	if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
				2032	goto fail;
				2033
				2034	assert (PyBytes_Check(next_input));
				2035
				2036	cookie.start_pos -= PyBytes_GET_SIZE(next_input);
				2037
				2038	/* How many decoded characters have been used up since the snapshot? */
				2039	if (self->decoded_chars_used == 0) {
				2040	/* We haven't moved from the snapshot point. */
				2041	Py_DECREF(posobj);
				2042	return TextIOWrapper_buildCookie(&cookie);
				2043	}
				2044
				2045	chars_to_skip = self->decoded_chars_used;
				2046
				2047	/* Starting from the snapshot position, we will walk the decoder
				2048	* forward until it gives us enough decoded characters.
				2049	*/
				2050	saved_state = PyObject_CallMethodObjArgs(self->decoder,
				2051	_PyIO_str_getstate, NULL);
				2052	if (saved_state == NULL)
				2053	goto fail;
				2054
				2055	/* Note our initial start point. */
				2056	if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
				2057	goto fail;
				2058
				2059	/* Feed the decoder one byte at a time. As we go, note the
				2060	* nearest "safe start point" before the current location
				2061	* (a point where the decoder has nothing buffered, so seek()
				2062	* can safely start from there and advance to this location).
				2063	*/
				2064	chars_decoded = 0;
				2065	input = PyBytes_AS_STRING(next_input);
				2066	input_end = input + PyBytes_GET_SIZE(next_input);
				2067	while (input < input_end) {
				2068	PyObject *state;
				2069	char *dec_buffer;
				2070	Py_ssize_t dec_buffer_len;
				2071	int dec_flags;
				2072
				2073	PyObject *decoded = PyObject_CallMethod(
				2074	self->decoder, "decode", "y#", input, 1);
				2075	if (decoded == NULL)
				2076	goto fail;
				2077	assert (PyUnicode_Check(decoded));
				2078	chars_decoded += PyUnicode_GET_SIZE(decoded);
				2079	Py_DECREF(decoded);
				2080
				2081	cookie.bytes_to_feed += 1;
				2082
				2083	state = PyObject_CallMethodObjArgs(self->decoder,
				2084	_PyIO_str_getstate, NULL);
				2085	if (state == NULL)
				2086	goto fail;
				2087	if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
				2088	Py_DECREF(state);
				2089	goto fail;
				2090	}
				2091	Py_DECREF(state);
				2092
				2093	if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
				2094	/* Decoder buffer is empty, so this is a safe start point. */
				2095	cookie.start_pos += cookie.bytes_to_feed;
				2096	chars_to_skip -= chars_decoded;
				2097	cookie.dec_flags = dec_flags;
				2098	cookie.bytes_to_feed = 0;
				2099	chars_decoded = 0;
				2100	}
				2101	if (chars_decoded >= chars_to_skip)
				2102	break;
				2103	input++;
				2104	}
				2105	if (input == input_end) {
				2106	/* We didn't get enough decoded data; signal EOF to get more. */
				2107	PyObject *decoded = PyObject_CallMethod(
				2108	self->decoder, "decode", "yi", "", /* final = */ 1);
				2109	if (decoded == NULL)
				2110	goto fail;
				2111	assert (PyUnicode_Check(decoded));
				2112	chars_decoded += PyUnicode_GET_SIZE(decoded);
				2113	Py_DECREF(decoded);
				2114	cookie.need_eof = 1;
				2115
				2116	if (chars_decoded < chars_to_skip) {
				2117	PyErr_SetString(PyExc_IOError,
				2118	"can't reconstruct logical file position");
				2119	goto fail;
				2120	}
				2121	}
				2122
				2123	/* finally */
				2124	Py_XDECREF(posobj);
				2125	res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
				2126	Py_DECREF(saved_state);
				2127	if (res == NULL)
				2128	return NULL;
				2129	Py_DECREF(res);
				2130
				2131	/* The returned cookie corresponds to the last safe start point. */
				2132	cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
				2133	return TextIOWrapper_buildCookie(&cookie);
				2134
				2135	fail:
				2136	Py_XDECREF(posobj);
				2137	if (saved_state) {
				2138	PyObject type, value, *traceback;
				2139	PyErr_Fetch(&type, &value, &traceback);
				2140
				2141	res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
				2142	Py_DECREF(saved_state);
				2143	if (res == NULL)
				2144	return NULL;
				2145	Py_DECREF(res);
				2146
				2147	PyErr_Restore(type, value, traceback);
				2148	}
				2149	return NULL;
				2150	}
				2151
				2152	static PyObject *
				2153	TextIOWrapper_truncate(PyTextIOWrapperObject self, PyObject args)
				2154	{
				2155	PyObject *pos = Py_None;
				2156	PyObject *res;
				2157
				2158	CHECK_INITIALIZED(self)
				2159	if (!PyArg_ParseTuple(args, "\|O:truncate", &pos)) {
				2160	return NULL;
				2161	}
				2162
				2163	res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
				2164	if (res == NULL)
				2165	return NULL;
				2166	Py_DECREF(res);
				2167
				2168	if (pos != Py_None) {
				2169	res = PyObject_CallMethodObjArgs((PyObject *) self,
				2170	_PyIO_str_seek, pos, NULL);
				2171	if (res == NULL)
				2172	return NULL;
				2173	Py_DECREF(res);
				2174	}
				2175
				2176	return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
				2177	}
				2178
Benjamin Peterson	c4c0eae	2009-03-09 00:07:03 +0000	[diff] [blame]	2179	static PyObject *
				2180	TextIOWrapper_repr(PyTextIOWrapperObject *self)
				2181	{
				2182	CHECK_INITIALIZED(self);
				2183	return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self->encoding);
				2184	}
				2185
				2186
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	2187	/* Inquiries */
				2188
				2189	static PyObject *
				2190	TextIOWrapper_fileno(PyTextIOWrapperObject self, PyObject args)
				2191	{
				2192	CHECK_INITIALIZED(self);
				2193	return PyObject_CallMethod(self->buffer, "fileno", NULL);
				2194	}
				2195
				2196	static PyObject *
				2197	TextIOWrapper_seekable(PyTextIOWrapperObject self, PyObject args)
				2198	{
				2199	CHECK_INITIALIZED(self);
				2200	return PyObject_CallMethod(self->buffer, "seekable", NULL);
				2201	}
				2202
				2203	static PyObject *
				2204	TextIOWrapper_readable(PyTextIOWrapperObject self, PyObject args)
				2205	{
				2206	CHECK_INITIALIZED(self);
				2207	return PyObject_CallMethod(self->buffer, "readable", NULL);
				2208	}
				2209
				2210	static PyObject *
				2211	TextIOWrapper_writable(PyTextIOWrapperObject self, PyObject args)
				2212	{
				2213	CHECK_INITIALIZED(self);
				2214	return PyObject_CallMethod(self->buffer, "writable", NULL);
				2215	}
				2216
				2217	static PyObject *
				2218	TextIOWrapper_isatty(PyTextIOWrapperObject self, PyObject args)
				2219	{
				2220	CHECK_INITIALIZED(self);
				2221	return PyObject_CallMethod(self->buffer, "isatty", NULL);
				2222	}
				2223
				2224	static PyObject *
				2225	TextIOWrapper_flush(PyTextIOWrapperObject self, PyObject args)
				2226	{
				2227	CHECK_INITIALIZED(self);
				2228	CHECK_CLOSED(self);
				2229	self->telling = self->seekable;
				2230	if (_TextIOWrapper_writeflush(self) < 0)
				2231	return NULL;
				2232	return PyObject_CallMethod(self->buffer, "flush", NULL);
				2233	}
				2234
				2235	static PyObject *
				2236	TextIOWrapper_close(PyTextIOWrapperObject self, PyObject args)
				2237	{
				2238	PyObject *res;
				2239	CHECK_INITIALIZED(self);
				2240	res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
				2241	if (res == NULL) {
				2242	/* If flush() fails, just give up */
				2243	PyErr_Clear();
				2244	}
				2245	else
				2246	Py_DECREF(res);
				2247
				2248	return PyObject_CallMethod(self->buffer, "close", NULL);
				2249	}
				2250
				2251	static PyObject *
				2252	TextIOWrapper_iternext(PyTextIOWrapperObject *self)
				2253	{
				2254	PyObject *line;
				2255
				2256	CHECK_INITIALIZED(self);
				2257
				2258	self->telling = 0;
				2259	if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
				2260	/* Skip method call overhead for speed */
				2261	line = _TextIOWrapper_readline(self, -1);
				2262	}
				2263	else {
				2264	line = PyObject_CallMethodObjArgs((PyObject *)self,
				2265	_PyIO_str_readline, NULL);
				2266	if (line && !PyUnicode_Check(line)) {
				2267	PyErr_Format(PyExc_IOError,
				2268	"readline() should have returned an str object, "
				2269	"not '%.200s'", Py_TYPE(line)->tp_name);
				2270	Py_DECREF(line);
				2271	return NULL;
				2272	}
				2273	}
				2274
				2275	if (line == NULL)
				2276	return NULL;
				2277
				2278	if (PyUnicode_GET_SIZE(line) == 0) {
				2279	/* Reached EOF or would have blocked */
				2280	Py_DECREF(line);
				2281	Py_CLEAR(self->snapshot);
				2282	self->telling = self->seekable;
				2283	return NULL;
				2284	}
				2285
				2286	return line;
				2287	}
				2288
				2289	static PyObject *
				2290	TextIOWrapper_name_get(PyTextIOWrapperObject self, void context)
				2291	{
				2292	CHECK_INITIALIZED(self);
				2293	return PyObject_GetAttrString(self->buffer, "name");
				2294	}
				2295
				2296	static PyObject *
				2297	TextIOWrapper_closed_get(PyTextIOWrapperObject self, void context)
				2298	{
				2299	CHECK_INITIALIZED(self);
				2300	return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
				2301	}
				2302
				2303	static PyObject *
				2304	TextIOWrapper_newlines_get(PyTextIOWrapperObject self, void context)
				2305	{
				2306	PyObject *res;
				2307	CHECK_INITIALIZED(self);
				2308	if (self->decoder == NULL)
				2309	Py_RETURN_NONE;
				2310	res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
				2311	if (res == NULL) {
				2312	PyErr_Clear();
				2313	Py_RETURN_NONE;
				2314	}
				2315	return res;
				2316	}
				2317
				2318	static PyObject *
				2319	TextIOWrapper_chunk_size_get(PyTextIOWrapperObject self, void context)
				2320	{
				2321	CHECK_INITIALIZED(self);
				2322	return PyLong_FromSsize_t(self->chunk_size);
				2323	}
				2324
				2325	static int
				2326	TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
				2327	PyObject arg, void context)
				2328	{
				2329	Py_ssize_t n;
				2330	CHECK_INITIALIZED_INT(self);
				2331	n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
				2332	if (n == -1 && PyErr_Occurred())
				2333	return -1;
				2334	if (n <= 0) {
				2335	PyErr_SetString(PyExc_ValueError,
				2336	"a strictly positive integer is required");
				2337	return -1;
				2338	}
				2339	self->chunk_size = n;
				2340	return 0;
				2341	}
				2342
				2343	static PyMethodDef TextIOWrapper_methods[] = {
				2344	{"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
				2345	{"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
				2346	{"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
				2347	{"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
				2348	{"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
				2349
				2350	{"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
				2351	{"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
				2352	{"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
				2353	{"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
				2354	{"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
				2355
				2356	{"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
				2357	{"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
				2358	{"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
				2359	{NULL, NULL}
				2360	};
				2361
				2362	static PyMemberDef TextIOWrapper_members[] = {
				2363	{"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
				2364	{"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
				2365	{"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
				2366	{NULL}
				2367	};
				2368
				2369	static PyGetSetDef TextIOWrapper_getset[] = {
				2370	{"name", (getter)TextIOWrapper_name_get, NULL, NULL},
				2371	{"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
				2372	/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
				2373	*/
				2374	{"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
				2375	{"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
				2376	(setter)TextIOWrapper_chunk_size_set, NULL},
				2377	{0}
				2378	};
				2379
				2380	PyTypeObject PyTextIOWrapper_Type = {
				2381	PyVarObject_HEAD_INIT(NULL, 0)
				2382	"_io.TextIOWrapper", /tp_name/
				2383	sizeof(PyTextIOWrapperObject), /tp_basicsize/
				2384	0, /tp_itemsize/
				2385	(destructor)TextIOWrapper_dealloc, /tp_dealloc/
				2386	0, /tp_print/
				2387	0, /tp_getattr/
Benjamin Peterson	c4c0eae	2009-03-09 00:07:03 +0000	[diff] [blame]	2388	0, /tps_etattr/
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	2389	0, /tp_compare /
Benjamin Peterson	c4c0eae	2009-03-09 00:07:03 +0000	[diff] [blame]	2390	(reprfunc)TextIOWrapper_repr,/tp_repr/
Benjamin Peterson	4fa88fa	2009-03-04 00:14:51 +0000	[diff] [blame]	2391	0, /tp_as_number/
				2392	0, /tp_as_sequence/
				2393	0, /tp_as_mapping/
				2394	0, /tp_hash /
				2395	0, /tp_call/
				2396	0, /tp_str/
				2397	0, /tp_getattro/
				2398	0, /tp_setattro/
				2399	0, /tp_as_buffer/
				2400	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE
				2401	\| Py_TPFLAGS_HAVE_GC, /tp_flags/
				2402	TextIOWrapper_doc, /* tp_doc */
				2403	(traverseproc)TextIOWrapper_traverse, /* tp_traverse */
				2404	(inquiry)TextIOWrapper_clear, /* tp_clear */
				2405	0, /* tp_richcompare */
				2406	offsetof(PyTextIOWrapperObject, weakreflist), /tp_weaklistoffset/
				2407	0, /* tp_iter */
				2408	(iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
				2409	TextIOWrapper_methods, /* tp_methods */
				2410	TextIOWrapper_members, /* tp_members */
				2411	TextIOWrapper_getset, /* tp_getset */
				2412	0, /* tp_base */
				2413	0, /* tp_dict */
				2414	0, /* tp_descr_get */
				2415	0, /* tp_descr_set */
				2416	offsetof(PyTextIOWrapperObject, dict), /tp_dictoffset/
				2417	(initproc)TextIOWrapper_init, /* tp_init */
				2418	0, /* tp_alloc */
				2419	PyType_GenericNew, /* tp_new */
				2420	};