Issue #25388: Fixed tokenizer hang when processing undecodable source code
with a null byte.
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index cfc6389..c166ff1 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -3,6 +3,9 @@
import sys
import _ast
from test import test_support
+from test import script_helper
+import os
+import tempfile
import textwrap
class TestSpecifics(unittest.TestCase):
@@ -555,6 +558,19 @@
ast.body = [_ast.BoolOp()]
self.assertRaises(TypeError, compile, ast, '<ast>', 'exec')
+ def test_yet_more_evil_still_undecodable(self):
+ # Issue #25388
+ src = b"#\x00\n#\xfd\n"
+ tmpd = tempfile.mkdtemp()
+ try:
+ fn = os.path.join(tmpd, "bad.py")
+ with open(fn, "wb") as fp:
+ fp.write(src)
+ rc, out, err = script_helper.assert_python_failure(fn)
+ finally:
+ test_support.rmtree(tmpd)
+ self.assertIn(b"Non-ASCII", err)
+
class TestStackSize(unittest.TestCase):
# These tests check that the computed stack size for a code object
diff --git a/Misc/NEWS b/Misc/NEWS
index 43e0418..5d30b1a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #25388: Fixed tokenizer hang when processing undecodable source code
+ with a null byte.
+
- Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now
rejects builtin types with not defined __new__.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 109c0ee..7e4a300 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -169,7 +169,8 @@
tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
PyMem_FREE(tok->buf);
- tok->buf = NULL;
+ tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+ tok->done = E_DECODE;
return NULL; /* as if it were EOF */
}
@@ -921,7 +922,6 @@
if (tok->buf != NULL)
PyMem_FREE(tok->buf);
tok->buf = newtok;
- tok->line_start = tok->buf;
tok->cur = tok->buf;
tok->line_start = tok->buf;
tok->inp = strchr(tok->buf, '\0');
@@ -944,7 +944,8 @@
}
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
tok) == NULL) {
- tok->done = E_EOF;
+ if (!tok->decoding_erred)
+ tok->done = E_EOF;
done = 1;
}
else {
@@ -978,6 +979,8 @@
return EOF;
}
tok->buf = newbuf;
+ tok->cur = tok->buf + cur;
+ tok->line_start = tok->cur;
tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize;
tok->start = curstart < 0 ? NULL :