Manual py3k backport: [svn r74158] Issue #6218: Make io.BytesIO and io.StringIO picklable.
diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c
index 95180a9..0fc8078 100644
--- a/Modules/_io/stringio.c
+++ b/Modules/_io/stringio.c
@@ -539,9 +539,9 @@
     if (self == NULL)
         return NULL;
 
-    self->string_size = 0;
-    self->pos = 0;
-    self->buf_size = 0;
+    /* tp_alloc initializes all the fields to zero. So we don't have to
+       initialize them here. */
+
     self->buf = (Py_UNICODE *)PyMem_Malloc(0);
     if (self->buf == NULL) {
         Py_DECREF(self);
@@ -571,8 +571,8 @@
         return -1;
     }
     if (value && value != Py_None && !PyUnicode_Check(value)) {
-        PyErr_Format(PyExc_ValueError,
-                     "initial_value must be str or None, not %.200s",
+        PyErr_Format(PyExc_TypeError,
+                     "initial_value must be unicode or None, not %.200s",
                      Py_TYPE(value)->tp_name);
         return -1;
     }
@@ -583,6 +583,9 @@
     Py_CLEAR(self->writenl);
     Py_CLEAR(self->decoder);
 
+    assert((newline != NULL && newline_obj != Py_None) ||
+           (newline == NULL && newline_obj == Py_None));
+
     if (newline) {
         self->readnl = PyString_FromString(newline);
         if (self->readnl == NULL)
@@ -654,6 +657,135 @@
     Py_RETURN_TRUE;
 }
 
+/* Pickling support.
+
+   The implementation of __getstate__ is similar to the one for BytesIO,
+   except that we also save the newline parameter. For __setstate__ and unlike
+   BytesIO, we call __init__ to restore the object's state. Doing so allows us
+   to avoid decoding the complex newline state while keeping the object
+   representation compact.
+
+   See comment in bytesio.c regarding why only pickle protocols and onward are
+   supported.
+*/
+
+static PyObject *
+stringio_getstate(stringio *self)
+{
+    PyObject *initvalue = stringio_getvalue(self);
+    PyObject *dict;
+    PyObject *state;
+
+    if (initvalue == NULL)
+        return NULL;
+    if (self->dict == NULL) {
+        Py_INCREF(Py_None);
+        dict = Py_None;
+    }
+    else {
+        dict = PyDict_Copy(self->dict);
+        if (dict == NULL)
+            return NULL;
+    }
+
+    state = Py_BuildValue("(OOnN)", initvalue,
+                          self->readnl ? self->readnl : Py_None,
+                          self->pos, dict);
+    Py_DECREF(initvalue);
+    return state;
+}
+
+static PyObject *
+stringio_setstate(stringio *self, PyObject *state)
+{
+    PyObject *initarg;
+    PyObject *position_obj;
+    PyObject *dict;
+    Py_ssize_t pos;
+
+    assert(state != NULL);
+    CHECK_CLOSED(self);
+
+    /* We allow the state tuple to be longer than 4, because we may need
+       someday to extend the object's state without breaking
+       backward-compatibility. */
+    if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
+        PyErr_Format(PyExc_TypeError,
+                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
+                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
+        return NULL;
+    }
+
+    /* Initialize the object's state. */
+    initarg = PyTuple_GetSlice(state, 0, 2);
+    if (initarg == NULL)
+        return NULL;
+    if (stringio_init(self, initarg, NULL) < 0) {
+        Py_DECREF(initarg);
+        return NULL;
+    }
+    Py_DECREF(initarg);
+
+    /* Restore the buffer state. Even if __init__ did initialize the buffer,
+       we have to initialize it again since __init__ may translates the
+       newlines in the inital_value string. We clearly do not want that
+       because the string value in the state tuple has already been translated
+       once by __init__. So we do not take any chance and replace object's
+       buffer completely. */
+    {
+        Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0));
+        Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0));
+        if (resize_buffer(self, bufsize) < 0)
+            return NULL;
+        memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE));
+        self->string_size = bufsize;
+    }
+
+    /* Set carefully the position value. Alternatively, we could use the seek
+       method instead of modifying self->pos directly to better protect the
+       object internal state against errneous (or malicious) inputs. */
+    position_obj = PyTuple_GET_ITEM(state, 2);
+    if (!PyIndex_Check(position_obj)) {
+        PyErr_Format(PyExc_TypeError,
+                     "third item of state must be an integer, got %.200s",
+                     Py_TYPE(position_obj)->tp_name);
+        return NULL;
+    }
+    pos = PyNumber_AsSsize_t(position_obj, PyExc_OverflowError);
+    if (pos == -1 && PyErr_Occurred())
+        return NULL;
+    if (pos < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "position value cannot be negative");
+        return NULL;
+    }
+    self->pos = pos;
+
+    /* Set the dictionary of the instance variables. */
+    dict = PyTuple_GET_ITEM(state, 3);
+    if (dict != Py_None) {
+        if (!PyDict_Check(dict)) {
+            PyErr_Format(PyExc_TypeError,
+                         "fourth item of state should be a dict, got a %.200s",
+                         Py_TYPE(dict)->tp_name);
+            return NULL;
+        }
+        if (self->dict) {
+            /* Alternatively, we could replace the internal dictionary
+               completely. However, it seems more practical to just update it. */
+            if (PyDict_Update(self->dict, dict) < 0)
+                return NULL;
+        }
+        else {
+            Py_INCREF(dict);
+            self->dict = dict;
+        }
+    }
+
+    Py_RETURN_NONE;
+}
+
+
 static PyObject *
 stringio_closed(stringio *self, void *context)
 {
@@ -688,10 +820,13 @@
     {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
     {"seek",     (PyCFunction)stringio_seek,     METH_VARARGS, stringio_seek_doc},
     {"write",    (PyCFunction)stringio_write,    METH_O,       stringio_write_doc},
-    
+
     {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
     {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
     {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
+
+    {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
+    {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
     {NULL, NULL}        /* sentinel */
 };