8000 [3.13] gh-119614: Fix truncation of strings with embedded null charac… · python/cpython@732c005 · GitHub
[go: up one dir, main page]

Skip to content

Commit 732c005

Browse files
[3.13] gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909) (GH-120938)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj(). (cherry picked from commit c38e2f6) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 206028d commit 732c005

File tree

4 files changed

+68
-7
lines changed

4 files changed

+68
-7
lines changed

Lib/test/test_tcl.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,18 @@ def testCall(self):
7373
tcl.call('set','a','1')
7474
self.assertEqual(tcl.call('set','a'),'1')
7575

76+
def test_call_passing_null(self):
77+
tcl = self.interp
78+
tcl.call('set', 'a', 'a\0b') # ASCII-only
79+
self.assertEqual(tcl.getvar('a'), 'a\x00b')
80+
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
81+
self.assertEqual(tcl.eval('set a'), 'a\x00b')
82+
83+
tcl.call('set', 'a', '\u20ac\0') # non-ASCII
84+
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
85+
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
86+
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
87+
7688
def testCallException(self):
7789
tcl = self.interp
7890
self.assertRaises(TclError,tcl.call,'set','a')
@@ -98,6 +110,18 @@ def testSetVar(self):
98110
tcl.setvar('a','1')
99111
self.assertEqual(tcl.eval('set a'),'1')
100112

113+
def test_setvar_passing_null(self):
114+
tcl = self.interp
115+
tcl.setvar('a', 'a\0b') # ASCII-only
116+
self.assertEqual(tcl.getvar('a'), 'a\x00b')
117+
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
118+
self.assertEqual(tcl.eval('set a'), 'a\x00b')
119+
120+
tcl.setvar('a', '\u20ac\0') # non-ASCII
121+
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
122+
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
123+
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
124+
101125
def testSetVarArray(self):
102126
tcl = self.interp
103127
tcl.setvar('a(1)','1')

Lib/test/test_tkinter/test_misc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,15 @@ def test_info_patchlevel(self):
476476
self.assertEqual(vi.micro, 0)
477477
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
478478

479+
def test_embedded_null(self):
480+
widget = tkinter.Entry(self.root)
481+
widget.insert(0, 'abc\0def') # ASCII-only
482+
widget.selection_range(0, 'end')
483+
self.assertEqual(widget.selection_get(), 'abc\x00def')
484+
widget.insert(0, '\u20ac\0') # non-ASCII
485+
widget.selection_range(0, 'end')
486+
self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
487+
479488

480489
class WmTest(AbstractTkTest, unittest.TestCase):
481490

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix truncation of strings with embedded null characters in some internal
2+
operations in :mod:`tkinter`.

Modules/_tkinter.c

+33Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
512512
else
513513
Py_UNREACHABLE();
514514
}
515-
#endif
515+
#endif /* USE_TCL_UNICODE */
516516
const char *s = Tcl_GetStringFromObj(value, &len);
517517
return unicodeFromTclStringAndSize(s, len);
518518
}
@@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
10181018
PyErr_SetString(PyExc_OverflowError, "string is too long");
10191019
return NULL;
10201020
}
1021-
if (PyUnicode_IS_ASCII(value)) {
1021+
if (PyUnicode_IS_ASCII(value) &&
1022+
strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
1023+
{
10221024
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
10231025
(int)size);
10241026
}
@@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
10331035
"surrogatepass", NATIVE_BYTEORDER);
10341036
else
10351037
Py_UNREACHABLE();
1036-
#else
1037-
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
1038-
#endif
10391038
if (!encoded) {
10401039
return NULL;
10411040
}
@@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
10451044
PyErr_SetString(PyExc_OverflowError, "string is too long");
10461045
return NULL;
10471046
}
1048-
#if USE_TCL_UNICODE
10491047
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
10501048
(int)(size / sizeof(Tcl_UniChar)));
10511049
#else
1050+
encoded 28BE = _PyUnicode_AsUTF8String(value, "surrogateescape");
1051+
if (!encoded) {
1052+
return NULL;
1053+
}
1054+
size = PyBytes_GET_SIZE(encoded);
1055+
if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
1056+
/* The string contains embedded null characters.
1057+
* Tcl needs a null character to be represented as \xc0\x80 in
1058+
* the Modified UTF-8 encoding. Otherwise the string can be
1059+
* truncated in some internal operations.
1060+
*
1061+
* NOTE: stringlib_replace() could be used here, but optimizing
1062+
* this obscure case isn't worth it unless stringlib_replace()
1063+
* was already exposed in the C API for other reasons. */
1064+
Py_SETREF(encoded,
1065+
PyObject_CallMethod(encoded, "replace", "y#y#",
1066+
"\0", (Py_ssize_t)1,
1067+
"\xc0\x80", (Py_ssize_t)2));
1068+
if (!encoded) {
1069+
return NULL;
1070+
}
1071+
size = PyBytes_GET_SIZE(encoded);
1072+
}
1073+
if (size > INT_MAX) {
1074+
Py_DECREF(encoded);
1075+
PyErr_SetString(PyExc_OverflowError, "string is too long");
1076+
return NULL;
1077+
}
10521078
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
1053-
#endif
1079+
#endif /* USE_TCL_UNICODE */
10541080
Py_DECREF(encoded);
10551081
return result;
10561082
}

0 commit comments

Comments
 (0)
0