bpo-34454: fix .fromisoformat() methods crashing on inputs with surrogate code points (GH-8862) The current C implementations **crash** if the input includes a surrogate Unicode code point, which is not possible to encode in UTF-8. Important notes: 1. It is possible to pass a non-UTF-8 string as a separator to the `.isoformat()` methods. 2. The pure-Python `datetime.fromisoformat()` implementation accepts strings with a surrogate as the separator. In `datetime.fromisoformat()`, in the special case of non-UTF-8 separators, this implementation will take a performance hit by making a copy of the input string and replacing the separator with 'T'. Co-authored-by: Alexey Izbyshev <izbyshev@ispras.ru> Co-authored-by: Paul Ganssle <paul@ganssle.io> (cherry picked from commit 096329f0b2bf5e3f0a16363aa631d993ce078737) Co-authored-by: Paul Ganssle <pganssle@users.noreply.github.com>

commit: 89b1654e0bc7bc69709dca86dd4c92eb7122ac7e [log] [tgz]
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Thu Aug 23 11:54:33 2018 -0400
committer: GitHub <noreply@github.com> Thu Aug 23 11:54:33 2018 -0400
tree: 2fb3592eebf21f08fc9374a8045ae4d4efa0353e
parent: 1f7d0470c26aa6fa2b924700f895cd6ee0a575fa [diff] [blame]
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index f647a23..9c6e71c 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py

@@ -1667,6 +1667,7 @@
         # Test that fromisoformat() fails on invalid values
         bad_strs = [
             '',                 # Empty string
+            '\ud800',           # bpo-34454: Surrogate code point
             '009-03-04',        # Not 10 characters
             '123456789',        # Not a date
             '200a-12-04',       # Invalid character in year
@@ -1675,6 +1676,7 @@
             '2009-01-32',       # Invalid day
             '2009-02-29',       # Invalid leap day
             '20090228',         # Valid ISO8601 output not from isoformat()
+            '2009\ud80002\ud80028',     # Separators are surrogate codepoints
         ]
 
         for bad_str in bad_strs:
@@ -2587,7 +2589,8 @@
             ' ', 'T', '\u007f',     # 1-bit widths
             '\u0080', 'ʁ',          # 2-bit widths
             'ᛇ', '時',               # 3-bit widths
-            '🐍'                     # 4-bit widths
+            '🐍',                    # 4-bit widths
+            '\ud800',               # bpo-34454: Surrogate code point
         ]
 
         for sep in separators:
@@ -2639,6 +2642,7 @@
         # Test that fromisoformat() fails on invalid values
         bad_strs = [
             '',                             # Empty string
+            '\ud800',                       # bpo-34454: Surrogate code point
             '2009.04-19T03',                # Wrong first separator
             '2009-04.19T03',                # Wrong second separator
             '2009-04-19T0a',                # Invalid hours
@@ -2652,6 +2656,8 @@
             '2009-04-19T03:15:45.123456+24:30',    # Invalid time zone offset
             '2009-04-19T03:15:45.123456-24:30',    # Invalid negative offset
             '2009-04-10ᛇᛇᛇᛇᛇ12:15',         # Too many unicode separators
+            '2009-04\ud80010T12:15',        # Surrogate char in date
+            '2009-04-10T12\ud80015',        # Surrogate char in time
             '2009-04-19T1',                 # Incomplete hours
             '2009-04-19T12:3',              # Incomplete minutes
             '2009-04-19T12:30:4',           # Incomplete seconds
@@ -3521,6 +3527,7 @@
     def test_fromisoformat_fails(self):
         bad_strs = [
             '',                         # Empty string
+            '12\ud80000',               # Invalid separator - surrogate char
             '12:',                      # Ends on a separator
             '12:30:',                   # Ends on a separator
             '12:30:15.',                # Ends on a separator
commit	89b1654e0bc7bc69709dca86dd4c92eb7122ac7e	[log] [tgz]
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	Thu Aug 23 11:54:33 2018 -0400
committer	GitHub <noreply@github.com>	Thu Aug 23 11:54:33 2018 -0400
tree	2fb3592eebf21f08fc9374a8045ae4d4efa0353e
parent	1f7d0470c26aa6fa2b924700f895cd6ee0a575fa [diff] [blame]