Bug #1704793: Return UTF-16 pair if unicodedata.lookup cannot represent the result in a single character.

commit: f1e0b3f6307084dc3429bd5a1361a5be7be708bb [log] [tgz]
author: Martin v. Löwis <martin@v.loewis.de> Sat Jul 28 07:03:05 2007 +0000
committer: Martin v. Löwis <martin@v.loewis.de> Sat Jul 28 07:03:05 2007 +0000
tree: 2bae20bba6c1842c7880bfd48a5ba8d2199e1aca
parent: f25e35b9ec2bb87833108c5bb615113a93894dce [diff]
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 0023bf4..574178d 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py

@@ -214,6 +214,9 @@
                 count += 1
         self.assert_(count >= 10) # should have tested at least the ASCII digits
 
+    def test_bug_1704793(self):
+        self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
+
 def test_main():
     test.test_support.run_unittest(
         UnicodeMiscTest,

diff --git a/Misc/NEWS b/Misc/NEWS
index 4764daf..55ebfa5 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS

@@ -238,6 +238,9 @@
 Library
 -------
 
+- Bug #1704793: Return UTF-16 pair if unicodedata.lookup cannot
+  represent the result in a single character.
+
 - Bug #978833: Close https sockets by releasing the _ssl object.
 
 - Change location of the package index to pypi.python.org/pypi

diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index fac9adc..a075693 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c

@@ -1077,8 +1077,7 @@
 unicodedata_lookup(PyObject* self, PyObject* args)
 {
     Py_UCS4 code;
-    Py_UNICODE str[1];
-    char errbuf[256];
+    Py_UNICODE str[2];
 
     char* name;
     int namelen;
@@ -1086,24 +1085,20 @@
         return NULL;
 
     if (!_getcode(self, name, namelen, &code)) {
-	/* XXX(nnorwitz): why are we allocating for the error msg?
-		Why not always use snprintf? */
-        char fmt[] = "undefined character name '%s'";
-        char *buf = PyMem_MALLOC(sizeof(fmt) + namelen);
-        if (buf)
-            sprintf(buf, fmt, name);
-        else {
-            buf = errbuf;
-            PyOS_snprintf(buf, sizeof(errbuf), fmt, name);
-        }
-        PyErr_SetString(PyExc_KeyError, buf);
-        if (buf != errbuf)
-        	PyMem_FREE(buf);
+        PyErr_Format(PyExc_KeyError, "undefined character name '%s'",
+                     name);
         return NULL;
     }
 
+#ifndef Py_UNICODE_WIDE
+    if (code >= 0x10000) {
+        str[0] = 0xd800 + ((code - 0x10000) >> 10);
+        str[1] = 0xdc00 + ((code - 0x10000) & 0x3ff);
+        return PyUnicode_FromUnicode(str, 2);
+    }
+#endif
     str[0] = (Py_UNICODE) code;
-    return PyUnicode_FromUnicode(str, 1);
+    return PyUnicode_FromUnicode(str, 1);    
 }
 
 /* XXX Add doc strings. */
commit	f1e0b3f6307084dc3429bd5a1361a5be7be708bb	[log] [tgz]
author	Martin v. Löwis <martin@v.loewis.de>	Sat Jul 28 07:03:05 2007 +0000
committer	Martin v. Löwis <martin@v.loewis.de>	Sat Jul 28 07:03:05 2007 +0000
tree	2bae20bba6c1842c7880bfd48a5ba8d2199e1aca
parent	f25e35b9ec2bb87833108c5bb615113a93894dce [diff]