Issue #20368: The null character now correctly passed from Tcl to Python (in
unicode strings only). Improved error handling in variables-related commands.
diff --git a/Lib/lib-tk/test/test_tkinter/test_variables.py b/Lib/lib-tk/test/test_tkinter/test_variables.py
index fe64ed2..a24ea38 100644
--- a/Lib/lib-tk/test/test_tkinter/test_variables.py
+++ b/Lib/lib-tk/test/test_tkinter/test_variables.py
@@ -58,6 +58,14 @@
with self.assertRaises(TypeError):
Variable(self.root, name=123)
+ def test_null_in_name(self):
+ with self.assertRaises(ValueError):
+ Variable(self.root, name='var\x00name')
+ with self.assertRaises(ValueError):
+ self.root.globalsetvar('var\x00name', "value")
+ with self.assertRaises(ValueError):
+ self.root.setvar('var\x00name', "value")
+
class TestStringVar(TestBase):
@@ -71,6 +79,12 @@
self.root.globalsetvar("name", "value")
self.assertEqual("value", v.get())
+ def test_get_null(self):
+ v = StringVar(self.root, "abc\x00def", "name")
+ self.assertEqual("abc\x00def", v.get())
+ self.root.globalsetvar("name", "val\x00ue")
+ self.assertEqual("val\x00ue", v.get())
+
class TestIntVar(TestBase):
diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py
index 8f262d0..2dad5a0 100644
--- a/Lib/test/test_tcl.py
+++ b/Lib/test/test_tcl.py
@@ -139,6 +139,18 @@
self.assertEqual(tcl.eval('set b'),'2')
self.assertEqual(tcl.eval('set c'),'3')
+ def test_evalfile_null_in_result(self):
+ tcl = self.interp
+ with open(test_support.TESTFN, 'wb') as f:
+ self.addCleanup(test_support.unlink, test_support.TESTFN)
+ f.write("""
+ set a "a\0b"
+ set b "a\\0b"
+ """)
+ tcl.evalfile(test_support.TESTFN)
+ self.assertEqual(tcl.eval('set a'), 'a\xc0\x80b')
+ self.assertEqual(tcl.eval('set b'), 'a\xc0\x80b')
+
def testEvalFileException(self):
tcl = self.interp
filename = "doesnotexists"
@@ -220,6 +232,7 @@
check('"abc"', 'abc')
check('"a\xc2\xbd\xe2\x82\xac"', 'a\xc2\xbd\xe2\x82\xac')
check(r'"a\xbd\u20ac"', 'a\xc2\xbd\xe2\x82\xac')
+ check(r'"a\0b"', 'a\xc0\x80b')
def test_exprdouble(self):
tcl = self.interp
@@ -326,8 +339,17 @@
self.assertEqual(passValue(True), True if self.wantobjects else '1')
self.assertEqual(passValue(False), False if self.wantobjects else '0')
+ self.assertEqual(passValue('string'), 'string')
+ self.assertEqual(passValue('string\xbd'), 'string\xbd')
+ self.assertEqual(passValue('string\xe2\x82\xac'), u'string\u20ac')
self.assertEqual(passValue(u'string'), u'string')
+ self.assertEqual(passValue(u'string\xbd'), u'string\xbd')
self.assertEqual(passValue(u'string\u20ac'), u'string\u20ac')
+ self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
+ self.assertEqual(passValue('str\xc0\x80ing'), 'str\x00ing')
+ self.assertEqual(passValue(u'str\x00ing'), u'str\x00ing')
+ self.assertEqual(passValue(u'str\x00ing\xbd'), u'str\x00ing\xbd')
+ self.assertEqual(passValue(u'str\x00ing\u20ac'), u'str\x00ing\u20ac')
for i in (0, 1, -1, int(2**31-1), int(-2**31)):
self.assertEqual(passValue(i), i if self.wantobjects else str(i))
for f in (0.0, 1.0, -1.0, 1//3, 1/3.0,
@@ -356,14 +378,16 @@
result.append(arg)
return arg
self.interp.createcommand('testfunc', testfunc)
- def check(value, expected, eq=self.assertEqual):
+ def check(value, expected, expected2=None, eq=self.assertEqual):
+ if expected2 is None:
+ expected2 = expected
del result[:]
r = self.interp.call('testfunc', value)
self.assertEqual(len(result), 1)
- self.assertIsInstance(result[0], str)
- eq(result[0], expected)
- self.assertIsInstance(r, str)
- eq(r, expected)
+ self.assertIsInstance(result[0], (str, unicode))
+ eq(result[0], expected2)
+ self.assertIsInstance(r, (str, unicode))
+ eq(r, expected2)
def float_eq(actual, expected):
expected = float(expected)
self.assertAlmostEqual(float(actual), expected,
@@ -376,7 +400,15 @@
check(False, '0')
check('string', 'string')
check('string\xbd', 'string\xbd')
- check('string\u20ac', 'string\u20ac')
+ check('string\xe2\x82\xac', 'string\xe2\x82\xac', u'string\u20ac')
+ check(u'string', u'string')
+ check(u'string\xbd', 'string\xc2\xbd', u'string\xbd')
+ check(u'string\u20ac', 'string\xe2\x82\xac', u'string\u20ac')
+ check('str\xc0\x80ing', 'str\xc0\x80ing', u'str\x00ing')
+ check('str\xc0\x80ing\xe2\x82\xac', 'str\xc0\x80ing\xe2\x82\xac', u'str\x00ing\u20ac')
+ check(u'str\x00ing', 'str\xc0\x80ing', u'str\x00ing')
+ check(u'str\x00ing\xbd', 'str\xc0\x80ing\xc2\xbd', u'str\x00ing\xbd')
+ check(u'str\x00ing\u20ac', 'str\xc0\x80ing\xe2\x82\xac', u'str\x00ing\u20ac')
for i in (0, 1, -1, 2**31-1, -2**31):
check(i, str(i))
for f in (0.0, 1.0, -1.0):
@@ -405,6 +437,7 @@
(u'a\n b\t\r c\n ', ('a', 'b', 'c')),
('a \xe2\x82\xac', ('a', '\xe2\x82\xac')),
(u'a \u20ac', ('a', '\xe2\x82\xac')),
+ ('a\xc0\x80b c\xc0\x80d', ('a\xc0\x80b', 'c\xc0\x80d')),
('a {b c}', ('a', 'b c')),
(r'a b\ c', ('a', 'b c')),
(('a', 'b c'), ('a', 'b c')),
@@ -449,6 +482,8 @@
(u'a\n b\t\r c\n ', ('a', 'b', 'c')),
('a \xe2\x82\xac', ('a', '\xe2\x82\xac')),
(u'a \u20ac', ('a', '\xe2\x82\xac')),
+ ('a\xc0\x80b', 'a\xc0\x80b'),
+ ('a\xc0\x80b c\xc0\x80d', ('a\xc0\x80b', 'c\xc0\x80d')),
('a {b c}', ('a', ('b', 'c'))),
(r'a b\ c', ('a', ('b', 'c'))),
(('a', 'b c'), ('a', ('b', 'c'))),
diff --git a/Misc/NEWS b/Misc/NEWS
index 48a0085..2b1dd07 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,9 @@
Library
-------
+- Issue #20368: The null character now correctly passed from Tcl to Python (in
+ unicode strings only). Improved error handling in variables-related commands.
+
- Issue #20435: Fix _pyio.StringIO.getvalue() to take into account newline
translation settings.
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c
index 0627d60..8d52b89 100644
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -456,6 +456,68 @@
+#ifdef Py_USING_UNICODE
+static PyObject *
+unicode_FromTclStringAndSize(const char *s, Py_ssize_t size)
+{
+ PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL);
+ if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ /* Tcl encodes null character as \xc0\x80 */
+ if (memchr(s, '\xc0', size)) {
+ char *buf, *q;
+ const char *e = s + size;
+ PyErr_Clear();
+ q = buf = (char *)PyMem_Malloc(size);
+ if (buf == NULL)
+ return NULL;
+ while (s != e) {
+ if (s + 1 != e && s[0] == '\xc0' && s[1] == '\x80') {
+ *q++ = '\0';
+ s += 2;
+ }
+ else
+ *q++ = *s++;
+ }
+ s = buf;
+ size = q - s;
+ r = PyUnicode_DecodeUTF8(s, size, NULL);
+ PyMem_Free(buf);
+ }
+ }
+ return r;
+}
+#endif
+
+static PyObject *
+fromTclStringAndSize(const char *s, Py_ssize_t size)
+{
+ PyObject *r;
+#ifdef Py_USING_UNICODE
+ Py_ssize_t i;
+ /* If Tcl string contains any bytes with the top bit set,
+ it's UTF-8 and we should decode it to Unicode */
+ for (i = 0; i < size; i++)
+ if (s[i] & 0x80)
+ break;
+ if (i != size) {
+ /* It isn't an ASCII string. */
+ r = unicode_FromTclStringAndSize(s, size);
+ if (r)
+ return r;
+ PyErr_Clear();
+ }
+#endif
+ r = PyString_FromStringAndSize(s, size);
+ return r;
+}
+
+static PyObject *
+fromTclString(const char *s)
+{
+ return fromTclStringAndSize(s, strlen(s));
+}
+
+
static PyObject *
Split(char *list)
{
@@ -841,27 +903,10 @@
static PyObject *
PyTclObject_string(PyTclObject *self, void *ignored)
{
- char *s;
- int i, len;
if (!self->string) {
- s = Tcl_GetStringFromObj(self->value, &len);
- for (i = 0; i < len; i++)
- if (s[i] & 0x80)
- break;
-#ifdef Py_USING_UNICODE
- if (i == len)
- /* It is an ASCII string. */
- self->string = PyString_FromStringAndSize(s, len);
- else {
- self->string = PyUnicode_DecodeUTF8(s, len, "strict");
- if (!self->string) {
- PyErr_Clear();
- self->string = PyString_FromStringAndSize(s, len);
- }
- }
-#else
- self->string = PyString_FromStringAndSize(s, len);
-#endif
+ int len;
+ char *s = Tcl_GetStringFromObj(self->value, &len);
+ self->string = fromTclStringAndSize(s, len);
if (!self->string)
return NULL;
}
@@ -883,7 +928,7 @@
}
/* XXX Could chache result if it is non-ASCII. */
s = Tcl_GetStringFromObj(self->value, &len);
- return PyUnicode_DecodeUTF8(s, len, "strict");
+ return unicode_FromTclStringAndSize(s, len);
}
#endif
@@ -1022,6 +1067,8 @@
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
+ if (sizeof(Py_UNICODE) == sizeof(Tcl_UniChar))
+ return Tcl_NewUnicodeObj(inbuf, size);
allocsize = ((size_t)size) * sizeof(Tcl_UniChar);
if (allocsize >= size)
outbuf = (Tcl_UniChar*)ckalloc(allocsize);
@@ -1073,30 +1120,7 @@
TkappObject *app = (TkappObject*)tkapp;
if (value->typePtr == NULL) {
- /* If the result contains any bytes with the top bit set,
- it's UTF-8 and we should decode it to Unicode */
-#ifdef Py_USING_UNICODE
- int i;
- char *s = value->bytes;
- int len = value->length;
- for (i = 0; i < len; i++) {
- if (value->bytes[i] & 0x80)
- break;
- }
-
- if (i == value->length)
- result = PyString_FromStringAndSize(s, len);
- else {
- /* Convert UTF-8 to Unicode string */
- result = PyUnicode_DecodeUTF8(s, len, "strict");
- if (result == NULL) {
- PyErr_Clear();
- result = PyString_FromStringAndSize(s, len);
- }
- }
-#else
- result = PyString_FromStringAndSize(value->bytes, value->length);
-#endif
+ result = fromTclStringAndSize(value->bytes, value->length);
return result;
}
@@ -1273,8 +1297,8 @@
Tkapp_CallResult(TkappObject *self)
{
PyObject *res = NULL;
+ Tcl_Obj *value = Tcl_GetObjResult(self->interp);
if(self->wantobjects) {
- Tcl_Obj *value = Tcl_GetObjResult(self->interp);
/* Not sure whether the IncrRef is necessary, but something
may overwrite the interpreter result while we are
converting it. */
@@ -1282,33 +1306,9 @@
res = FromObj((PyObject*)self, value);
Tcl_DecrRefCount(value);
} else {
- const char *s = Tcl_GetStringResult(self->interp);
- const char *p = s;
-
- /* If the result contains any bytes with the top bit set,
- it's UTF-8 and we should decode it to Unicode */
-#ifdef Py_USING_UNICODE
- while (*p != '\0') {
- if (*p & 0x80)
- break;
- p++;
- }
-
- if (*p == '\0')
- res = PyString_FromStringAndSize(s, (int)(p-s));
- else {
- /* Convert UTF-8 to Unicode string */
- p = strchr(p, '\0');
- res = PyUnicode_DecodeUTF8(s, (int)(p-s), "strict");
- if (res == NULL) {
- PyErr_Clear();
- res = PyString_FromStringAndSize(s, (int)(p-s));
- }
- }
-#else
- p = strchr(p, '\0');
- res = PyString_FromStringAndSize(s, (int)(p-s));
-#endif
+ int len;
+ const char *s = Tcl_GetStringFromObj(value, &len);
+ res = fromTclStringAndSize(s, len);
}
return res;
}
@@ -1611,16 +1611,28 @@
static int
varname_converter(PyObject *in, void *_out)
{
+ char *s;
char **out = (char**)_out;
if (PyString_Check(in)) {
- *out = PyString_AsString(in);
+ if (PyString_Size(in) > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError, "string is too long");
+ return 0;
+ }
+ s = PyString_AsString(in);
+ if (strlen(s) != PyString_Size(in)) {
+ PyErr_SetString(PyExc_ValueError, "null character in string");
+ return 0;
+ }
+ *out = s;
return 1;
}
if (PyTclObject_Check(in)) {
*out = PyTclObject_TclString(in);
return 1;
}
- /* XXX: Should give diagnostics. */
+ PyErr_Format(PyExc_TypeError,
+ "must be str or Tcl_Obj, not %.50s",
+ in->ob_type->tp_name);
return 0;
}
@@ -1706,8 +1718,11 @@
PyObject *res = NULL;
Tcl_Obj *newval, *ok;
- if (PyArg_ParseTuple(args, "O&O:setvar",
- varname_converter, &name1, &newValue)) {
+ switch (PyTuple_GET_SIZE(args)) {
+ case 2:
+ if (!PyArg_ParseTuple(args, "O&O:setvar",
+ varname_converter, &name1, &newValue))
+ return NULL;
/* XXX Acquire tcl lock??? */
newval = AsObj(newValue);
if (newval == NULL)
@@ -1723,27 +1738,27 @@
Py_INCREF(res);
}
LEAVE_OVERLAP_TCL
- }
- else {
- PyErr_Clear();
- if (PyArg_ParseTuple(args, "ssO:setvar",
- &name1, &name2, &newValue)) {
- /* XXX must hold tcl lock already??? */
- newval = AsObj(newValue);
- ENTER_TCL
- ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags);
- ENTER_OVERLAP
- if (!ok)
- Tkinter_Error(self);
- else {
- res = Py_None;
- Py_INCREF(res);
- }
- LEAVE_OVERLAP_TCL
- }
- else {
+ break;
+ case 3:
+ if (!PyArg_ParseTuple(args, "ssO:setvar",
+ &name1, &name2, &newValue))
return NULL;
+ /* XXX must hold tcl lock already??? */
+ newval = AsObj(newValue);
+ ENTER_TCL
+ ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags);
+ ENTER_OVERLAP
+ if (!ok)
+ Tkinter_Error(self);
+ else {
+ res = Py_None;
+ Py_INCREF(res);
}
+ LEAVE_OVERLAP_TCL
+ break;
+ default:
+ PyErr_SetString(PyExc_TypeError, "setvar requires 2 to 3 arguments");
+ return NULL;
}
return res;
}
@@ -1783,7 +1798,9 @@
res = FromObj(self, tres);
}
else {
- res = PyString_FromString(Tcl_GetString(tres));
+ int len;
+ char *s = Tcl_GetStringFromObj(tres, &len);
+ res = PyString_FromStringAndSize(s, len);
}
}
LEAVE_OVERLAP_TCL
@@ -1921,7 +1938,7 @@
if (retval == TCL_ERROR)
res = Tkinter_Error(self);
else
- res = Py_BuildValue("s", Tkapp_Result(self));
+ res = PyString_FromString(Tkapp_Result(self));
LEAVE_OVERLAP_TCL
return res;
}
@@ -2158,7 +2175,7 @@
return PythonCmd_Error(interp);
for (i = 0; i < (argc - 1); i++) {
- PyObject *s = PyString_FromString(argv[i + 1]);
+ PyObject *s = fromTclString(argv[i + 1]);
if (!s || PyTuple_SetItem(arg, i, s)) {
Py_DECREF(arg);
return PythonCmd_Error(interp);