8000 [3.13] gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909) by miss-islington · Pull Request #120938 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.13] gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909) #120938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Lib/test/test_tcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def testCall(self):
tcl.call('set','a','1')
self.assertEqual(tcl.call('set','a'),'1')

def test_call_passing_null(self):
tcl = self.interp
tcl.call('set', 'a', 'a\0b') # ASCII-only
self.assertEqual(tcl.getvar('a'), 'a\x00b')
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
self.assertEqual(tcl.eval('set a'), 'a\x00b')

tcl.call('set', 'a', '\u20ac\0') # non-ASCII
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')

def testCallException(self):
tcl = self.interp
self.assertRaises(TclError,tcl.call,'set','a')
Expand All @@ -98,6 +110,18 @@ def testSetVar(self):
tcl.setvar('a','1')
self.assertEqual(tcl.eval('set a'),'1')

def test_setvar_passing_null(self):
tcl = self.interp
tcl.setvar('a', 'a\0b') # ASCII-only
self.assertEqual(tcl.getvar('a'), 'a\x00b')
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
self.assertEqual(tcl.eval('set a'), 'a\x00b')

tcl.setvar('a', '\u20ac\0') # non-ASCII
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')

def testSetVarArray(self):
tcl = self.interp
tcl.setvar('a(1)','1')
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_tkinter/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,15 @@ def test_info_patchlevel(self):
self.assertEqual(vi.micro, 0)
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))

def test_embedded_null(self):
widget = tkinter.Entry(self.root)
widget.insert(0, 'abc\0def') # ASCII-only
widget.selection_range(0, 'end')
self.assertEqual(widget.selection_get(), 'abc\x00def')
widget.insert(0, '\u20ac\0') # non-ASCII
widget.selection_range(0, 'end')
self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')


class WmTest(AbstractTkTest, unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix truncation of strings with embedded null characters in some internal
operations in :mod:`tkinter`.
40 changes: 33 additions & 7 deletions Modules/_tkinter.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
else
Py_UNREACHABLE();
}
#endif
#endif /* USE_TCL_UNICODE */
const char *s = Tcl_GetStringFromObj(value, &len);
return unicodeFromTclStringAndSize(s, len);
}
Expand Down Expand Up @@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
if (PyUnicode_IS_ASCII(value)) {
if (PyUnicode_IS_ASCII(value) &&
strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
{
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
(int)size);
}
Expand All @@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
"surrogatepass", NATIVE_BYTEORDER);
else
Py_UNREACHABLE();
#else
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
#endif
if (!encoded) {
return NULL;
}
Expand All @@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
#if USE_TCL_UNICODE
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
(int)(size / sizeof(Tcl_UniChar)));
#else
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
if (!encoded) {
return NULL;
}
size = PyBytes_GET_SIZE(encoded);
if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
/* The string contains embedded null characters.
* Tcl needs a null character to be represented as \xc0\x80 in
* the Modified UTF-8 encoding. Otherwise the string can be
* truncated in some internal operations.
*
* NOTE: stringlib_replace() could be used here, but optimizing
* this obscure case isn't worth it unless stringlib_replace()
* was already exposed in the C API for other reasons. */
Py_SETREF(encoded,
PyObject_CallMethod(encoded, "replace", "y#y#",
"\0", (Py_ssize_t)1,
"\xc0\x80", (Py_ssize_t)2));
if (!encoded) {
return NULL;
}
size = PyBytes_GET_SIZE(encoded);
}
if (size > INT_MAX) {
Py_DECREF(encoded);
PyErr_SetString(PyExc_OverflowError, "string is too long");
return NULL;
}
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
#endif
#endif /* USE_TCL_UNICODE */
Py_DECREF(encoded);
return result;
}
Expand Down
Loading
0