Issue #4163: Use unicode-friendly word splitting in the textwrap functions when given an unicode string.
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 1eab90c..c91e242 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -174,7 +174,7 @@
text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n"
"released on 1994-02-15.")
- self.check_wrap(text, 30, ['Python 1.0.0 was released on',
+ self.check_wrap(text, 35, ['Python 1.0.0 was released on',
'1994-01-26. Python 1.0.1 was',
'released on 1994-02-15.'])
self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
@@ -353,6 +353,14 @@
otext = self.wrapper.fill(text)
assert isinstance(otext, unicode)
+ def test_no_split_at_umlaut(self):
+ text = u"Die Empf\xe4nger-Auswahl"
+ self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
+
+ def test_umlaut_followed_by_dash(self):
+ text = u"aa \xe4\xe4-\xe4\xe4"
+ self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
+
def test_split(self):
# Ensure that the standard _split() method works as advertised
# in the comments