#7643: Unicode codepoints VT (0x0B) and FF (0x0C) are linebreaks according to Unicode Standard Annex #14.
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 9c89896..4904f70 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -24,7 +24,7 @@
def test_method_checksum(self):
h = hashlib.sha1()
- for i in range(65536):
+ for i in range(0x10000):
char = unichr(i)
data = [
# Predicates (single char)
@@ -282,6 +282,17 @@
self.assertEqual(u"\u01c5".title(), u"\u01c5")
self.assertEqual(u"\u01c6".title(), u"\u01c5")
+ def test_linebreak_7643(self):
+ for i in range(0x10000):
+ lines = (unichr(i) + u'A').splitlines()
+ if i in (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
+ 0x1c, 0x1d, 0x1e, 0x2028, 0x2029):
+ self.assertEqual(len(lines), 2,
+ r"\u%.4x should be a linebreak" % i)
+ else:
+ self.assertEqual(len(lines), 1,
+ r"\u%.4x should not be a linebreak" % i)
+
def test_main():
test.test_support.run_unittest(
UnicodeMiscTest,