SF patch 549375:  Compromise PyUnicode_EncodeUTF8
This implements ideas from Marc-Andre, Martin, Guido and me on Python-Dev.

"Short" Unicode strings are encoded into a "big enough" stack buffer,
then exactly as much string space as they turn out to need is allocated
at the end.  This should have speed benefits akin to Martin's "measure
once, allocate once" strategy, but without needing a distinct measuring
pass.

"Long" Unicode strings allocate as much heap space as they could possibly
need (4 x # Unicode chars), and do a realloc at the end to return the
untouched excess.  Since the overallocation is likely to be substantial,
this shouldn't burden the platform realloc with unusably small excess
blocks.

Also simplified uses of the PyString_xyz functions.  Also added a release-
build check that 4*size doesn't overflow a C int.  Sooner or later, that's
going to happen.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 54db9aa..c4c8d8d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1138,142 +1138,104 @@
     return NULL;
 }
 
-/* Not used anymore, now that the encoder supports UTF-16
-   surrogates. */
-#if 0
-static
-int utf8_encoding_error(const Py_UNICODE **source,
-			char **dest,
-			const char *errors,
-			const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "UTF-8 encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "UTF-8 encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-#endif
-
-/* Allocation strategy: we default to Latin-1, then do one resize
-   whenever we hit an order boundary. The assumption is that
-   characters from higher orders usually occur often enough to warrant
-   this.
+/* Allocation strategy:  if the string is short, convert into a stack buffer
+   and allocate exactly as much space needed at the end.  Else allocate the
+   maximum possible needed (4 result bytes per Unicode character), and return
+   the excess memory at the end.
 */
-
 PyObject *
 PyUnicode_EncodeUTF8(const Py_UNICODE *s,
 		     int size,
 		     const char *errors)
 {
-    PyObject *v;
-    char *p;
-    int len;
-    int i = 0;
-    long overalloc = 2;
-    int nallocated;  /* overalloc * size; PyString_ adds one more for \0 */
+#define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */
 
-    /* Short-cut for empty strings */
-    if (size == 0)
-	return PyString_FromStringAndSize(NULL, 0);
+    int i;              /* index into s of next input byte */
+    PyObject *v;        /* result string object */
+    char *p;            /* next free byte in output buffer */
+    int nallocated;     /* number of result bytes allocated */
+    int nneeded;        /* number of result bytes needed */
+    char stackbuf[MAX_SHORT_UNICHARS * 4];
 
-    nallocated = Py_SAFE_DOWNCAST(overalloc * size, long, int);
-    v = PyString_FromStringAndSize(NULL, nallocated);
-    if (v == NULL)
-        return NULL;
+    assert(s != NULL);
+    assert(size >= 0);
 
-    p = PyString_AS_STRING(v);
+    if (size <= MAX_SHORT_UNICHARS) {
+        /* Write into the stack buffer; nallocated can't overflow.
+         * At the end, we'll allocate exactly as much heap space as it
+         * turns out we need.
+         */
+        nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
+        v = NULL;   /* will allocate after we're done */
+        p = stackbuf;
+    }
+    else {
+        /* Overallocate on the heap, and give the excess back at the end. */
+        nallocated = size * 4;
+        if (nallocated / 4 != size)  /* overflow! */
+            return PyErr_NoMemory();
+        v = PyString_FromStringAndSize(NULL, nallocated);
+        if (v == NULL)
+            return NULL;
+        p = PyString_AS_STRING(v);
+    }
 
-    while (i < size) {
+    for (i = 0; i < size;) {
         Py_UCS4 ch = s[i++];
 
         if (ch < 0x80)
-	    /* Encode ASCII */
+            /* Encode ASCII */
             *p++ = (char) ch;
 
         else if (ch < 0x0800) {
-	    /* Encode Latin-1 */
+            /* Encode Latin-1 */
             *p++ = (char)(0xc0 | (ch >> 6));
             *p++ = (char)(0x80 | (ch & 0x3f));
         }
-
         else {
-	    /* Encode UCS2 Unicode ordinals */
-	    if (ch < 0x10000) {
-
-		/* Special case: check for high surrogate */
-		if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
-		    Py_UCS4 ch2 = s[i];
-		    /* Check for low surrogate and combine the two to
-		       form a UCS4 value */
-		    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
+            /* Encode UCS2 Unicode ordinals */
+            if (ch < 0x10000) {
+                /* Special case: check for high surrogate */
+                if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
+                    Py_UCS4 ch2 = s[i];
+                    /* Check for low surrogate and combine the two to
+                       form a UCS4 value */
+                    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
                         ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
-			i++;
-			goto encodeUCS4;
+                        i++;
+                        goto encodeUCS4;
                     }
-		    /* Fall through: handles isolated high surrogates */
+                    /* Fall through: handles isolated high surrogates */
                 }
-
-		if (overalloc < 3) {
-		    len = Py_SAFE_DOWNCAST(p-PyString_AS_STRING(v), long, int);
-                    assert(len <= nallocated);
-		    overalloc = 3;
-                    nallocated = Py_SAFE_DOWNCAST(overalloc * size, long, int);
-		    if (_PyString_Resize(&v, nallocated))
-			goto onError;
-		    p = PyString_AS_STRING(v) + len;
-		}
                 *p++ = (char)(0xe0 | (ch >> 12));
-		*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
-		*p++ = (char)(0x80 | (ch & 0x3f));
-		continue;
-	    }
-
-	    /* Encode UCS4 Unicode ordinals */
-	encodeUCS4:
-	    if (overalloc < 4) {
-                len = Py_SAFE_DOWNCAST(p - PyString_AS_STRING(v), long, int);
-                assert(len <= nallocated);
-		overalloc = 4;
-                nallocated = Py_SAFE_DOWNCAST(overalloc * size, long, int);
-		if (_PyString_Resize(&v, nallocated))
-		    goto onError;
-		p = PyString_AS_STRING(v) + len;
-	    }
-	    *p++ = (char)(0xf0 | (ch >> 18));
-	    *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
-	    *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
-	    *p++ = (char)(0x80 | (ch & 0x3f));
-	}
+                *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+                *p++ = (char)(0x80 | (ch & 0x3f));
+                continue;
+    	    }
+encodeUCS4:
+            /* Encode UCS4 Unicode ordinals */
+            *p++ = (char)(0xf0 | (ch >> 18));
+            *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
+            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+            *p++ = (char)(0x80 | (ch & 0x3f));
+        }
     }
 
-    *p = '\0';
-    len = Py_SAFE_DOWNCAST(p - PyString_AS_STRING(v), long, int);
-    assert(len <= nallocated);
-    if (_PyString_Resize(&v, len))
-	goto onError;
+    if (v == NULL) {
+        /* This was stack allocated. */
+        nneeded = Py_SAFE_DOWNCAST(p - stackbuf, long, int);
+        assert(nneeded <= nallocated);
+        v = PyString_FromStringAndSize(stackbuf, nneeded);
+    }
+    else {
+    	/* Cut back to size actually needed. */
+        nneeded = Py_SAFE_DOWNCAST(p - PyString_AS_STRING(v), long, int);
+        assert(nneeded <= nallocated);
+        _PyString_Resize(&v, nneeded);
+    }
     return v;
 
- onError:
-    Py_DECREF(v);
-    return NULL;
+#undef MAX_SHORT_UNICHARS
 }
 
 PyObject *PyUnicode_AsUTF8String(PyObject *unicode)