8000 bpo-45061: Detect refcount bug on empty string singleton (GH-28504) · python/cpython@86f2837 · GitHub
[go: up one dir, main page]

Skip to content

Commit 86f2837

Browse files
authored
bpo-45061: Detect refcount bug on empty string singleton (GH-28504)
Detect refcount bugs in C extensions when the empty Unicode string singleton is destroyed by mistake. * Move forward declarations to the top of unicodeobject.c. * Simplifiy unicode_is_singleton().
1 parent 06e1773 commit 86f2837

File tree

2 files changed

+40
-20
lines changed

2 files changed

+40
-20
lines changed
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Add a deallocator to the bool type to detect refcount bugs in C extensions
22
which call Py_DECREF(Py_True) or Py_DECREF(Py_False) by mistake. Detect also
3-
refcount bugs when the empty tuple singleton is destroyed by mistake. Patch
4-
by Victor Stinner.
3+
refcount bugs when the empty tuple singleton or the Unicode empty string
4+
singleton is destroyed by mistake.
5+
Patch by Victor Stinner.

Objects/unicodeobject.c

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4848
#include "pycore_interp.h" // PyInterpreterState.fs_codec
4949
#include "pycore_object.h" // _PyObject_GC_TRACK()
5050
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
51+
#include "pycore_pyerrors.h" // _Py_FatalRefcountError()
5152
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
5253
#include "pycore_pystate.h" // _PyInterpreterState_GET()
5354
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@@ -212,6 +213,24 @@ extern "C" {
212213
#endif
213214

214215

216+
/* Forward declaration */
217+
static inline int
218+
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
219+
static inline void
220+
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
221+
static PyObject *
222+
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
223+
const char *errors);
224+
static PyObject *
225+
unicode_decode_utf8(const char *s, Py_ssize_t size,
226+
_Py_error_handler error_handler, const char *errors,
227+
Py_ssize_t *consumed);
228+
#ifdef Py_DEBUG
229+
static inline int unicode_is_finalizing(void);
230+
static int unicode_is_singleton(PyObject *unicode);
231+
#endif
232+
233+
215234
static struct _Py_unicode_state*
216235
get_unicode_state(void)
217236
{
@@ -279,19 +298,6 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
279298
}
280299

281300

282-
/* Forward declaration */
283-
static inline int
284-
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
285-
static inline void
286-
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
287-
static PyObject *
288-
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
289-
const char *errors);
290-
static PyObject *
291-
unicode_decode_utf8(const char *s, Py_ssize_t size,
292-
_Py_error_handler error_handler, const char *errors,
293-
Py_ssize_t *consumed);
294-
295301
/* Fast detection of the most frequent whitespace characters */
296302
const unsigned char _Py_ascii_whitespace[] = {
297303
0, 0, 0, 0, 0, 0, 0, 0,
@@ -1930,6 +1936,12 @@ _PyUnicode_Ready(PyObject *unicode)
19301936
static void
19311937
unicode_dealloc(PyObject *unicode)
19321938
{
1939+
#ifdef Py_DEBUG
1940+
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
1941+
_Py_FatalRefcountError("deallocating an Unicode singleton");
1942+
}
1943+
#endif
1944+
19331945
switch (PyUnicode_CHECK_INTERNED(unicode)) {
19341946
case SSTATE_NOT_INTERNED:
19351947
break;
@@ -1982,11 +1994,8 @@ unicode_is_singleton(PyObject *unicode)
19821994
if (unicode == state->empty_string) {
19831995
return 1;
19841996
}
1985-
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
1986-
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
1987-
{
1988-
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
1989-
if (ch < 256 && state->latin1[ch] == unicode) {
1997+
for (Py_ssize_t i = 0; i < 256; i++) {
1998+
if (unicode == state->latin1[i]) {
19901999
return 1;
19912000
}
19922001
}
@@ -15984,6 +15993,16 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1598415993
#endif
1598515994

1598615995

15996+
#ifdef Py_DEBUG
15997+
static inline int
15998+
unicode_is_finalizing(void)
15999+
{
16000+
struct _Py_unicode_state *state = get_unicode_state();
16001+
return (state->interned == NULL);
16002+
}
16003+
#endif
16004+
16005+
1598716006
void
1598816007
_PyUnicode_Fini(PyInterpreterState *interp)
1598916008
{

0 commit comments

Comments
 (0)
0