8000 gh-128137: Split out interned field from state · python/cpython@45536d0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 45536d0

Browse files
committed
gh-128137: Split out interned field from state
1 parent 180d417 commit 45536d0

File tree

2 files changed

+30
-26
lines changed

2 files changed

+30
-26
lines changed

Include/cpython/unicodeobject.h

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,17 @@ typedef struct {
9999
PyObject_HEAD
100100
Py_ssize_t length; /* Number of code points in the string */
101101
Py_hash_t hash; /* Hash value; -1 if not set */
102+
/* If interned is non-zero, the two references from the
103+
dictionary to this object are *not* counted in ob_refcnt.
104+
The possible values here are:
105+
0: Not Interned
106+
1: Interned
107+
2: Interned and Immortal
108+
3: Interned, Immortal, and Static
109+
This categorization allows the runtime to determine the right
110+
cleanup mechanism at runtime shutdown. */
111+
uint16_t interned;
102112
struct {
103-
/* If interned is non-zero, the two references from the
104-
dictionary to this object are *not* counted in ob_refcnt.
105-
The possible values here are:
106-
0: Not Interned
107-
1: Interned
108-
2: Interned and Immortal
109-
3: Interned, Immortal, and Static
110-
This categorization allows the runtime to determine the right
111-
cleanup mechanism at runtime shutdown. */
112-
unsigned int interned:2;
113113
/* Character size:
114114
115115
- PyUnicode_1BYTE_KIND (1):
@@ -132,21 +132,21 @@ typedef struct {
132132
* all characters are in the range U+0000-U+10FFFF
133133
* at least one character is in the range U+10000-U+10FFFF
134134
*/
135-
unsigned int kind:3;
135+
uint16_t kind:3;
136136
/* Compact is with respect to the allocation scheme. Compact unicode
137137
objects only require one memory block while non-compact objects use
138138
one block for the PyUnicodeObject struct and another for its data
139139
buffer. */
140-
unsigned int compact:1;
140+
uint16_t compact:1;
141141
/* The string only contains characters in the range U+0000-U+007F (ASCII)
142142
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
143143
set, use the PyASCIIObject structure. */
144-
unsigned int ascii:1;
144+
uint16_t ascii:1;
145145
/* The object is statically allocated. */
146-
unsigned int statically_allocated:1;
146+
uint16_t statically_allocated:1;
147147
/* Padding to ensure that PyUnicode_DATA() is always aligned to
148148
4 bytes (see issue #19537 on m68k). */
149-
unsigned int :24;
149+
uint16_t :10;
150150
} state;
151151
} PyASCIIObject;
152152

@@ -195,7 +195,11 @@ typedef struct {
195195

196196
/* Use only if you know it's a string */
197197
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
198-
return _PyASCIIObject_CAST(op)->state.interned;
198+
#ifdef Py_GIL_DISABLED
199+
return _Py_atomic_load_uint16_relaxed(&(_PyASCIIObject_CAST(op)->interned));
200+
#else
201+
return _PyASCIIObject_CAST(op)->interned;
202+
#endif
199203
}
200204
#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))
201205

Objects/unicodeobject.c

Lines changed: 10 additions & 10 deletions
57AE
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
14091409
data = unicode + 1;
14101410
_PyUnicode_LENGTH(unicode) = size;
14111411
_PyUnicode_HASH(unicode) = -1;
1412-
_PyUnicode_STATE(unicode).interned = 0;
1412+
_PyASCIIObject_CAST(unicode)->interned = 0;
14131413
_PyUnicode_STATE(unicode).kind = kind;
14141414
_PyUnicode_STATE(unicode).compact = 1;
14151415
_PyUnicode_STATE(unicode).ascii = is_ascii;
@@ -1711,7 +1711,7 @@ unicode_dealloc(PyObject *unicode)
17111711
_Py_SetImmortal(unicode);
17121712
return;
17131713
}
1714-
switch (_PyUnicode_STATE(unicode).interned) {
1714+
switch (_PyASCIIObject_CAST(unicode)->interned) {
17151715
case SSTATE_NOT_INTERNED:
17161716
break;
17171717
case SSTATE_INTERNED_MORTAL:
@@ -1739,7 +1739,7 @@ unicode_dealloc(PyObject *unicode)
17391739
// so it can't cause trouble (except wasted memory)
17401740
// - if it wasn't popped, it'll remain interned
17411741
_Py_SetImmortal(unicode);
1742-
_PyUnicode_STATE(unicode).interned = SSTATE_INTERNED_IMMORTAL;
1742+
_PyASCIIObject_CAST(unicode)->interned = SSTATE_INTERNED_IMMORTAL;
17431743
return;
17441744
}
17451745
if (r == 0) {
@@ -15470,7 +15470,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1547015470
#else
1547115471
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
1547215472
#endif
15473-
_PyUnicode_STATE(self).interned = 0;
15473+
_PyASCIIObject_CAST(self)->interned = 0;
1547415474
_PyUnicode_STATE(self).kind = kind;
1547515475
_PyUnicode_STATE(self).compact = 0;
1547615476
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
@@ -15689,7 +15689,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
1568915689
assert(r == NULL);
1569015690
/* but just in case (for the non-debug build), handle this */
1569115691
if (r != NULL && r != s) {
15692-
assert(_PyUnicode_STATE(r).interned == SSTATE_INTERNED_IMMORTAL_STATIC);
15692+
assert(_PyASCIIObject_CAST(r)->interned == SSTATE_INTERNED_IMMORTAL_STATIC);
1569315693
assert(_PyUnicode_CHECK(r));
1569415694
Py_DECREF(s);
1569515695
return Py_NewRef(r);
@@ -15699,7 +15699,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
1569915699
Py_FatalError("failed to intern static string");
1570015700
}
1570115701

15702-
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
15702+
_PyASCIIObject_CAST(s)->interned = SSTATE_INTERNED_IMMORTAL_STATIC;
1570315703
return s;
1570415704
}
1570515705

@@ -15726,7 +15726,7 @@ immortalize_interned(PyObject *s)
1572615726
_Py_DecRefTotal(_PyThreadState_GET());
1572715727
}
1572815728
#endif
15729-
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
15729+
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_IMMORTAL);
1573015730
_Py_SetImmortal(s);
1573115731
}
1573215732

@@ -15833,7 +15833,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1583315833

1583415834
/* NOT_INTERNED -> INTERNED_MORTAL */
1583515835

15836-
assert(_PyUnicode_STATE(s).interned == SSTATE_NOT_INTERNED);
15836+
assert(_PyASCIIObject_CAST(s)->interned == SSTATE_NOT_INTERNED);
1583715837

1583815838
if (!_Py_IsImmortal(s)) {
1583915839
/* The two references in interned dict (key and value) are not counted.
@@ -15845,7 +15845,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1584515845
_Py_DecRefTotal(_PyThreadState_GET());
1584615846
#endif
1584715847
}
15848-
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15848+
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_MORTAL);
1584915849

1585015850
/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */
1585115851

@@ -15981,7 +15981,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1598115981
Py_UNREACHABLE();
1598215982
}
1598315983
if (!shared) {
15984-
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
15984+
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_NOT_INTERNED);
1598515985
}
1598615986
}
1598715987
#ifdef INTERNED_STATS

0 commit comments

Comments
 (0)
0