8000 gh-100227: Move the Dict of Interned Strings to PyInterpreterState by ericsnowcurrently · Pull Request #102339 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-100227: Move the Dict of Interned Strings to PyInterpreterState #102339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move the interned dict to PyInterpreterState.
  • Loading branch information
ericsnowcurrently committed Feb 28, 2023
commit 23e2211c679b8cfad23c4c61ad82b00e5a53d8d8
9 changes: 2 additions & 7 deletions Include/internal/pycore_global_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ extern "C" {
// Only immutable objects should be considered runtime-global.
// All others must be per-interpreter.

#define _Py_CACHED_OBJECT(NAME) \
_PyRuntime.cached_objects.NAME

struct _Py_cached_objects {
PyObject *interned_strings;
};

#define _Py_GLOBAL_OBJECT(NAME) \
_PyRuntime.static_objects.NAME
#define _Py_SINGLETON(NAME) \
Expand Down Expand Up @@ -65,6 +58,8 @@ struct _Py_static_objects {
(interp)->cached_objects.NAME

struct _Py_interp_cached_objects {
PyObject *interned_strings;

/* AST */
PyObject *str_replace_inf;

Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ typedef struct pyruntimestate {
} types;

/* All the objects that are shared by the runtime's interpreters. */
struct _Py_cached_objects cached_objects;
struct _Py_static_objects static_objects;

/* The following fields are here to avoid allocation during init.
Expand Down
50 changes: 23 additions & 27 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
#ifdef Py_DEBUG
static inline int unicode_is_finalizing(void);
static inline int unicode_is_finalizing(PyInterpreterState *);
static int unicode_is_singleton(PyObject *unicode);
#endif

Expand All @@ -231,33 +231,33 @@ static inline PyObject* unicode_new_empty(void)
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
static inline PyObject *get_interned_dict(void)
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_CACHED_OBJECT(interned_strings);
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}

static PyObject *
ensure_interned_dict()
ensure_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict();
PyObject *interned = get_interned_dict(interp);
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
return NULL;
}
_Py_CACHED_OBJECT(interned_strings) = interned;
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
}
return interned;
}

static void
clear_interned_dict(void)
clear_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict();
PyObject *interned = get_interned_dict(interp);
if (interned != NULL) {
PyDict_Clear(interned);
Py_DECREF(interned);
_Py_CACHED_OBJECT(interned_strings) = NULL;
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
}

Expand Down Expand Up @@ -1543,8 +1543,9 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
static void
unicode_dealloc(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
#ifdef Py_DEBUG
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
if (!unicode_is_finalizing(interp) && unicode_is_singleton(unicode)) {
_Py_FatalRefcountError("deallocating an Unicode singleton");
}
#endif
Expand All @@ -1556,7 +1557,7 @@ unicode_dealloc(PyObject *unicode)
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
PyObject *interned = get_interned_dict();
PyObject *interned = get_interned_dict(interp);
assert(interned != NULL);
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
Expand Down Expand Up @@ -14623,7 +14624,8 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}

PyObject *interned = ensure_interned_dict();
PyInterpreterState *interp = _PyInterpreterState_GET();
PyObject *interned = ensure_interned_dict(interp);
if (interned == NULL) {
PyErr_Clear(); /* Don't leave an exception */
return;
Expand Down Expand Up @@ -14671,12 +14673,7 @@ PyUnicode_InternFromString(const char *cp)
void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
{
if (!_Py_IsMainInterpreter(interp)) {
// interned dict is shared by all interpreters
return;
}

PyObject *interned = get_interned_dict();
PyObject *interned = get_interned_dict(interp);
if (interned == NULL) {
return;
}
Expand Down Expand Up @@ -14711,7 +14708,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
total_length);
#endif

clear_interned_dict();
clear_interned_dict(interp);
}


Expand Down Expand Up @@ -15122,9 +15119,9 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)

#ifdef Py_DEBUG
static inline int
unicode_is_finalizing(void)
unicode_is_finalizing(PyInterpreterState *interp)
{
return (get_interned_dict() == NULL);
return (get_interned_dict(interp) == NULL);
}
#endif

Expand All @@ -15147,14 +15144,13 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;

if (_Py_IsMainInterpreter(interp)) {
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict() == NULL);
// bpo-47182: force a unicodedata CAPI capsule re-import on
// subsequent initialization of main interpreter.
}
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict(interp) == NULL);

_PyUnicode_FiniEncodings(&state->fs_codec);

// bpo-47182: force a unicodedata CAPI capsule re-import on
// subsequent initialization of interpreter.
interp->unicode.ucnhash_capi = NULL;

unicode_clear_identifiers(state);
Expand Down
0