8000 [WIP] bpo-39465: _PyUnicode_FromId() now uses an hash table by vstinner · Pull Request #20048 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[WIP] bpo-39465: _PyUnicode_FromId() now uses an hash table #20048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
bpo-39465: _PyUnicode_FromId() now uses an hash table
Rewrote _Py_Identifier structure and _PyUnicode_FromId() function to
store Python objects in an hash table rather than a single-linked
list.

Add _PyUnicode_PreInit() to create the hash table: it must be called
before the first PyType_Ready() call.
  • Loading branch information
vstinner committed May 13, 2020
commit 2b0acbbd90b3ea1d80d8ec3d3e71f4ad3a111039
4 changes: 1 addition & 3 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
*/
typedef struct _Py_Identifier {
struct _Py_Identifier *next;
const char* string;
PyObject *object;
} _Py_Identifier;

#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
#define _Py_static_string_init(value) { .string = value}
#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)

Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);

/* Various one-time initializers */

extern PyStatus _PyUnicode_PreInit(PyThreadState *tstate);
extern PyStatus _PyUnicode_Init(void);
extern int _PyStructSequence_Init(void);
extern int _PyLong_Init(PyThreadState *tstate);
Expand Down
67 changes: 48 additions & 19 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h"
#include "pycore_fileutils.h"
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h"
Expand Down Expand Up @@ -286,7 +287,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
Py_ssize_t *consumed);

/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
static _Py_hashtable_t *static_strings = NULL;

/* bpo-40521: Latin1 singletons are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
Expand Down Expand Up @@ -2275,31 +2276,43 @@ PyUnicode_FromString(const char *u)
PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{
if (!id->object) {
id->object = PyUnicode_DecodeUTF8Stateful(id->string,
strlen(id->string),
NULL, NULL);
if (!id->object)
return NULL;
PyUnicode_InternInPlace(&id->object);
assert(!id->next);
id->next = static_strings;
static_strings = id;
PyObject *object = _Py_hashtable_get(static_strings, id);
if (object) {
// Return a borrowed reference
return object;
}

object = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string),
NULL, NULL);
if (object == NULL) {
return NULL;
}
return id->object;
PyUnicode_InternInPlace(&object);

// Store a strong reference
if (_Py_hashtable_set(static_strings, id, object) < 0) {
PyErr_NoMemory();
return NULL;
}

// Return a borrowed reference
return object;
}

static void
static_strings_decref(void *data)
{
PyObject *object = (PyObject *)data;
Py_DECREF(object);
}

void
_PyUnicode_ClearStaticStrings()
{
_Py_Identifier *tmp, *s = static_strings;
while (s) {
Py_CLEAR(s->object);
tmp = s->next;
s->next = NULL;
s = tmp;
if (static_strings) {
_Py_hashtable_destroy(static_strings);
static_strings = NULL;
}
static_strings = NULL;
}

/* Internal function, doesn't check maximum character */
Expand Down Expand Up @@ -15509,6 +15522,22 @@ PyTypeObject PyUnicode_Type = {

/* Initialize the Unicode implementation */

PyStatus
_PyUnicode_PreInit(PyThreadState *tstate)
{
if (_Py_IsMainInterpreter(tstate)) {
static_strings = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct,
NULL, static_strings_decref,
NULL);
if (static_strings == NULL) {
return _PyStatus_NO_MEMORY();
}
}
return _PyStatus_OK();
}


PyStatus
_PyUnicode_Init(void)
{
Expand Down
5 changes: 5 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,11 @@ pycore_init_types(PyThreadState *tstate)
return status;
}

status = _PyUnicode_PreInit(tstate);
if (_PyStatus_EXCEPTION(status)) {
return status;
}

if (is_main_interp) {
status = _PyTypes_Init();
if (_PyStatus_EXCEPTION(status)) {
Expand Down
0