8000 GH-100240: Generic freelist, applied to ints by iritkatriel · Pull Request #101453 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-100240: Generic freelist, applied to ints #101453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extern "C" {
#include "pycore_dict_state.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_pymem.h" // free lists
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gc.h" // struct _gc_runtime_state
Expand Down Expand Up @@ -50,6 +51,9 @@ struct _Py_long_state {

/* interpreter state */

#define SMALL_OBJECT_FREELIST_SIZE 1024
#define INTERP_NUM_FREELISTS 30

/* PyInterpreterState holds the global state for one of the runtime's
interpreters. Typically the initial (main) interpreter is the only one.

Expand Down Expand Up @@ -178,6 +182,9 @@ struct _is {
struct _Py_context_state context;
struct _Py_exc_state exc_state;

#if WITH_FREELISTS
_PyFreeList freelists[INTERP_NUM_FREELISTS];
#endif
struct ast_state ast;
struct types_state types;
struct callable_cache callable_cache;
Expand Down Expand Up @@ -230,6 +237,34 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *);

#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/2)
Copy link
Member
@markshannon markshannon Feb 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why divide by 2 and not 2 * sizeof(void *), which is the quantum of allocation for malloc and ob_malloc?
(Assuming that the size is in bytes)
Could you use the term "size class" or equivalent, instead of "freelist index" The mapping here is from sizes to classes (not all size classes get free lists).

Any reason not to make this an inline function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, put an assert in this function, and put a size check in the caller.

#define FREELIST_INDEX_TO_SIZE(idx) (2*(idx) + 4)

static inline PyObject*
_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) {
#if WITH_FREELISTS
int index = SIZE_TO_FREELIST_INDEX(size);
assert(index >= 0 && index < INTERP_NUM_FREELISTS);
return _PyFreeList_Alloc(&interp->freelists[index]);
#else
return PyObject_Malloc(size);
#endif
}

static inline void
_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) {
#if WITH_FREELISTS
/* todo: assert the size is correct? */
int index = SIZE_TO_FREELIST_INDEX(size);
assert(index >= 0 && index < INTERP_NUM_FREELISTS);
_PyFreeList_Alloc(&interp->freelists[index]);
#else
Py_TYPE(op)->tp_free((PyObject *)op);
#endif
}



#ifdef __cplusplus
}
#endif
Expand Down
61 changes: 61 additions & 0 deletions Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,67 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName(
PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator);


#if WITH_FREELISTS
/* Free lists.
*
* Free lists have a pointer to their first entry and
* the amount of space available allowing fast checks
* for emptiness and fullness.
* When empty they are half filled and when full they are
* completely emptied. This helps the underlying allocator
* avoid fragmentation and helps performance.
*/

typedef struct _freelist {
void *ptr;
uint32_t space;
uint16_t size;
uint16_t capacity;
} _PyFreeList;

extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list);
extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr);


static inline void *
_PyFreeList_Alloc(_PyFreeList *list) {
if (list->ptr != NULL) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.from_freelist++;
#endif
void *result = list->ptr;
list->ptr = *((void **)result);
list->space++;
return result;
}
return _PyFreeList_HalfFillAndAllocate(list);
}

static inline void
_PyFreeList_Free(_PyFreeList *list, void *ptr) {
if (list->space) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.to_freelist++;
#endif
*((void **)ptr) = list->ptr;
list->ptr = ptr;
list->space--;
return;
}
_PyFreeList_FreeToFull(list, ptr);
}

static inline void
_PyFreeList_Init(_PyFreeList *list, int size, int capacity)
{
list->ptr = NULL;
list->space = list->capacity = capacity;
list->size = size;
}

extern void _PyFreeList_Clear(_PyFreeList *list);
#endif /* WITH_FREELISTS */

#ifdef __cplusplus
}
#endif
Expand Down
48 changes: 33 additions & 15 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_long.h" // _Py_SmallInts
#include "pycore_pymem.h" // Free lists
#include "pycore_object.h" // _PyObject_InitVar()
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "pycore_runtime.h" // _PY_NSMALLPOSINTS
Expand Down Expand Up @@ -152,16 +153,20 @@ _PyLong_New(Py_ssize_t size)
"too many digits in integer");
return NULL;
}
/* Fast operations for single digit integers (including zero)
* assume that there is always at least one digit present. */
Py_ssize_t ndigits = size ? size : 1;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
ndigits*sizeof(digit));
assert(size >= 0);
if (size <= 1) {
PyInterpreterState *interp = _PyInterpreterState_GET();
result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject));
}
else {
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
size*sizeof(digit));
}
if (!result) {
PyErr_NoMemory();
return NULL;
Expand Down Expand Up @@ -201,11 +206,11 @@ _PyLong_FromMedium(sdigit x)
{
assert(!IS_SMALL_INT(x));
assert(is_medium_int(x));
/* We could use a freelist here */
PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject));
PyInterpreterState *interp = _PyInterpreterState_GET();
PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc(
interp, sizeof(PyLongObject));
if (v == NULL) {
PyErr_NoMemory();
return NULL;
return PyErr_NoMemory();
}
Py_ssize_t sign = x < 0 ? -1: 1;
digit abs_x = x < 0 ? -x : x;
Expand Down Expand Up @@ -267,6 +272,19 @@ _PyLong_FromSTwoDigits(stwodigits x)
return _PyLong_FromLarge(x);
}

static void
int_dealloc(PyLongObject *op)
{
if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject));
}
else
{
Py_TYPE(op)->tp_free((PyObject *)op);
}
}

int
_PyLong_AssignValue(PyObject **target, Py_ssize_t value)
{
Expand Down Expand Up @@ -6289,7 +6307,7 @@ PyTypeObject PyLong_Type = {
"int", /* tp_name */
offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */
sizeof(digit), /* tp_itemsize */
0, /* tp_dealloc */
(destructor)int_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
Expand Down
63 changes: 63 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,69 @@ PyObject_Free(void *ptr)
# define LIKELY(value) (value)
#endif


#if WITH_FREELISTS
void *
_PyFreeList_HalfFillAndAllocate(_PyFreeList *list)
{
assert(list->ptr == NULL);
if (list->capacity < 4) {
return PyObject_Malloc(list->size);
}
uint32_t i = 0;
for (; i < list->space>>1; i++) {
void* ptr = PyObject_Malloc(list->size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for future PR: we should add the bulk allocate/free capability to the allocator.

if (ptr == NULL) {
break;
}
*((void**)ptr) = list->ptr;
list->ptr = ptr;
}
if (i == 0) {
return NULL;
}
void *result = list->ptr;
list->ptr = *((void **)result);
list->space -= (i-1);
return result;
}

void
_PyFreeList_Clear(_PyFreeList *list)
{
int space = 0;
void *head = list->ptr;
while (head) {
void *next = *((void**)head);
PyObject_Free(head);
head = next;
space++;
}
list->ptr = NULL;
list->space += space;
}

void
_PyFreeList_FreeToFull(_PyFreeList *list, void *ptr)
{
assert(list->space == 0);
PyObject_Free(ptr);
if (list->ptr == NULL) {
return;
}
int space = 0;
void *head = list->ptr;
while (head) {
void *next = *((void**)head);
PyObject_Free(head);
head = next;
space++;
}
list->ptr = ptr;
list->space = space;
}
#endif /* WITH_FREELISTS */

#ifdef WITH_PYMALLOC

#ifdef WITH_VALGRIND
Expand Down
14 changes: 14 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,14 @@ PyInterpreterState_New(void)

init_interpreter(interp, runtime, id, old_head, pending_lock);

#if WITH_FREELISTS
for (int i=0; i < INTERP_NUM_FREELISTS; i++) {
_PyFreeList_Init(&interp->freelists[i],
FREELIST_INDEX_TO_SIZE(i),
SMALL_OBJECT_FREELIST_SIZE);
}
#endif

HEAD_UNLOCK(runtime);
return interp;

Expand Down Expand Up @@ -681,6 +689,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
}
HEAD_UNLOCK(runtime);

#if WITH_FREELISTS
for (int i=0; i < INTERP_NUM_FREELISTS; i++) {
_PyFreeList_Clear(&interp->freelists[i]);
}
#endif

Py_CLEAR(interp->audit_hooks);

PyConfig_Clear(&interp->config);
Expand Down
0