Merged revisions 75931 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines

  do a backport of r75928

  The added test does not fail without the patch, but we still fix the issue of
  surrogates being used in wide builds where they should not be.
........
diff --git a/Python/ast.c b/Python/ast.c
index a3fdd89..b89e29c 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3248,10 +3248,11 @@
                 u = NULL;
         } else {
                 /* check for integer overflow */
-                if (len > PY_SIZE_MAX / 4)
+                if (len > PY_SIZE_MAX / 6)
                         return NULL;
-                /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
-                u = PyString_FromStringAndSize((char *)NULL, len * 4);
+		/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+		   "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+                u = PyString_FromStringAndSize((char *)NULL, len * 6);
                 if (u == NULL)
                         return NULL;
                 p = buf = PyString_AsString(u);
@@ -3268,19 +3269,21 @@
                                 PyObject *w;
                                 char *r;
                                 Py_ssize_t rn, i;
-                                w = decode_utf8(c, &s, end, "utf-16-be");
+                                w = decode_utf8(c, &s, end, "utf-32-be");
                                 if (w == NULL) {
                                         Py_DECREF(u);
                                         return NULL;
                                 }
                                 r = PyString_AsString(w);
                                 rn = PyString_Size(w);
-                                assert(rn % 2 == 0);
-                                for (i = 0; i < rn; i += 2) {
-                                        sprintf(p, "\\u%02x%02x",
+                                assert(rn % 4 == 0);
+                                for (i = 0; i < rn; i += 4) {
+                                        sprintf(p, "\\U%02x%02x%02x%02x",
                                                 r[i + 0] & 0xFF,
-                                                r[i + 1] & 0xFF);
-                                        p += 6;
+                                                r[i + 1] & 0xFF,
+						r[i + 2] & 0xFF,
+						r[i + 3] & 0xFF);
+                                        p += 10;
                                 }
                                 Py_DECREF(w);
                         } else {