#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept
and return characters from the full Unicode range (Py_UCS4).
The differences from Python code are few:
- unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit()
now return the correct value for large code points
- repr() may consider more characters as printable.
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index b572261..bfbb0aa 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -294,6 +294,12 @@
self.assertEqual(len(lines), 1,
r"\u%.4x should not be a linebreak" % i)
+ def test_UCS4(self):
+ # unicodedata should work with code points outside the BMP
+ # even on a narrow Unicode build
+ self.assertEqual(self.db.category(u"\U0001012A"), "No")
+ self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
+
def test_main():
test.support.run_unittest(
UnicodeMiscTest,