SF patch #470578: Fixes to synchronize unicode() and str()

    This patch implements what we have discussed on python-dev late in
    September: str(obj) and unicode(obj) should behave similar, while
    the old behaviour is retained for unicode(obj, encoding, errors).

    The patch also adds a new feature with which objects can provide
    unicode(obj) with input data: the __unicode__ method. Currently no
    new tp_unicode slot is implemented; this is left as option for the
    future.

    Note that PyUnicode_FromEncodedObject() no longer accepts Unicode
    objects as input. The API name already suggests that Unicode
    objects do not belong in the list of acceptable objects and the
    functionality was only needed because
    PyUnicode_FromEncodedObject() was being used directly by
    unicode(). The latter was changed in the discussed way:

    * unicode(obj) calls PyObject_Unicode()
    * unicode(obj, encoding, errors) calls PyUnicode_FromEncodedObject()

    One thing left open to discussion is whether to leave the
    PyUnicode_FromObject() API as a thin API extension on top of
    PyUnicode_FromEncodedObject() or to turn it into a (macro) alias
    for PyObject_Unicode() and deprecate it. Doing so would have some
    surprising consequences though, e.g.  u"abc" + 123 would turn out
    as u"abc123"...

[Marc-Andre didn't have time to check this in before the deadline.  I
hope this is OK, Marc-Andre!  You can still make changes and commit
them on the trunk after the branch has been made, but then please mail
Barry a context diff if you want the change to be merged into the
2.2b1 release branch.  GvR]
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a252587..a29c75b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -395,6 +395,18 @@
 
 PyObject *PyUnicode_FromObject(register PyObject *obj)
 {
+    /* XXX Perhaps we should make this API an alias of
+           PyObject_Unicode() instead ?! */
+    if (PyUnicode_CheckExact(obj)) {
+	Py_INCREF(obj);
+	return obj;
+    }
+    if (PyUnicode_Check(obj)) {
+	/* For a Unicode subtype that's not a Unicode object,
+	   return a true Unicode object with the same data. */
+	return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+				     PyUnicode_GET_SIZE(obj));
+    }
     return PyUnicode_FromEncodedObject(obj, NULL, "strict");
 }
 
@@ -406,69 +418,49 @@
     int len;
     int owned = 0;
     PyObject *v;
-    int reclevel;
     
     if (obj == NULL) {
 	PyErr_BadInternalCall();
 	return NULL;
     }
 
-    /* Coerce object */
-    for (reclevel = 0; reclevel < 2; reclevel++) {
+#if 0
+    /* For b/w compatibility we also accept Unicode objects provided
+       that no encodings is given and then redirect to PyObject_Unicode() 
+       which then applies the additional logic for Unicode subclasses.
 
+       NOTE: This API should really only be used for object which
+             represent *encoded* Unicode !
+
+    */
 	if (PyUnicode_Check(obj)) {
 	    if (encoding) {
 		PyErr_SetString(PyExc_TypeError,
 				"decoding Unicode is not supported");
-		goto onError;
+	    return NULL;
 	    }
-	    if (PyUnicode_CheckExact(obj)) {
-		Py_INCREF(obj);
-		v = obj;
+	return PyObject_Unicode(obj);
 	    }
-	    else {
-		/* For a subclass of unicode, return a true unicode object
-		   with the same string value. */
-		v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
-					  PyUnicode_GET_SIZE(obj));
-	    }
-	    goto done;
+#else
+    if (PyUnicode_Check(obj)) {
+	PyErr_SetString(PyExc_TypeError,
+			"decoding Unicode is not supported");
+	return NULL;
 	}
-	else if (PyString_Check(obj)) {
+#endif
+
+    /* Coerce object */
+    if (PyString_Check(obj)) {
 	    s = PyString_AS_STRING(obj);
 	    len = PyString_GET_SIZE(obj);
-	    break;
-	}
-	else {
-	    PyObject *w;
-
-	    /* Try char buffer interface */
-            if (PyObject_AsCharBuffer(obj, &s, &len))
-		PyErr_Clear();
-	    else
-		break;
-    
-	    /* Mimic the behaviour of str(object) if everything else
-    	       fails (see PyObject_Str()); this also covers instances
-    	       which implement __str__. */
-	    if (obj->ob_type->tp_str == NULL)
-		w = PyObject_Repr(obj);
-	    else
-		w = (*obj->ob_type->tp_str)(obj);
-	    if (w == NULL)
-		goto onError;
-	    if (owned) {
-		Py_DECREF(obj);
 	    }
-	    obj = w;
-	    owned = 1;
-	}
-    }
-
-    if (s == NULL) {
+    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
+	/* Overwrite the error message with something more useful in
+	   case of a TypeError. */
+	if (PyErr_ExceptionMatches(PyExc_TypeError))
 	PyErr_Format(PyExc_TypeError,
-		     "coercing to Unicode: __str__ recursion limit exceeded "
-		     "(last type: %.80s)",
+			 "coercing to Unicode: need string or buffer, "
+			 "%.80s found",
 		     obj->ob_type->tp_name);
 	goto onError;
     }
@@ -481,7 +473,6 @@
     else 
 	v = PyUnicode_Decode(s, len, encoding, errors);
 
- done:
     if (owned) {
 	Py_DECREF(obj);
     }
@@ -5653,6 +5644,9 @@
 	    return NULL;
 	if (x == NULL)
 		return (PyObject *)_PyUnicode_New(0);
+	if (encoding == NULL && errors == NULL)
+	    return PyObject_Unicode(x);
+	else
 	return PyUnicode_FromEncodedObject(x, encoding, errors);
 }