10000 GH-100240: Generic freelist, applied to ints by iritkatriel · Pull Request #101453 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-100240: Generic freelist, applied to ints #101453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extern "C" {
#include "pycore_dict_state.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_pymem.h" // free lists
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gc.h" // struct _gc_runtime_state
Expand Down Expand Up @@ -49,6 +50,10 @@ struct _Py_long_state {


/* interpreter state */
#define WITH_FREELISTS 1

#define SMALL_OBJECT_FREELIST_SIZE 1024
#define INTERP_NUM_FREELISTS 30

/* PyInterpreterState holds the global state for one of the runtime's
interpreters. Typically the initial (main) interpreter is the only one.
Expand Down Expand Up @@ -178,6 +183,9 @@ struct _is {
struct _Py_context_state context;
struct _Py_exc_state exc_state;

#if WITH_FREELISTS
_PyFreeList freelists[INTERP_NUM_FREELISTS];
#endif
struct ast_state ast;
struct types_state types;
struct callable_cache callable_cache;
Expand Down Expand Up @@ -230,6 +238,28 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *);


#if WITH_FREELISTS
static inline PyObject*
_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, int size) {
assert(size >= 4);
assert((size & 0x1) == 0);
int index = (size-4)/2;
return _PyFreeList_Alloc(&interp->freelists[index]);
}

static inline void
_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, int size) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is size?. The assumption is that it is size in bytes, but I think you are using size in machine words.
Also needs to be Py_ssize_t.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's the results of sizeof(PyLongObject), that should be bytes no?

/* todo: assert the size is correct? */
assert(size >= 4);
assert((size & 0x1) == 0);
int index = (size-4)/2;
_PyFreeList_Alloc(&interp->freelists[index]);
}

#endif /* WITH_FREELISTS */


#ifdef __cplusplus
}
#endif
Expand Down
62 changes: 62 additions & 0 deletions Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,68 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName(
PYMEM_ALLOCATOR_NOT_SET does nothing. */
PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator);

#define WITH_FREELISTS 1

#if WITH_FREELISTS
/* Free lists.
*
* Free lists have a pointer to their first entry and
* the amount of space available allowing fast checks
* for emptiness and fullness.
* When empty they are half filled and when full they are
* completely emptied. This helps the underlying allocator
* avoid fragmentation and helps performance.
*/

typedef struct _freelist {
void *ptr;
uint32_t space;
uint16_t size;
uint16_t capacity;
} _PyFreeList;

extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list);
extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr);


static inline void *
_PyFreeList_Alloc(_PyFreeList *list) {
if (list->ptr != NULL) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.from_freelist++;
#endif
void *result = list->ptr;
list->ptr = *((void **)result);
list->space++;
return result;
}
return _PyFreeList_HalfFillAndAllocate(list);
}

static inline void
_PyFreeList_Free(_PyFreeList *list, void *ptr) {
if (list->space) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.to_freelist++;
#endif
*((void **)ptr) = list->ptr;
list->ptr = ptr;
list->space--;
return;
}
_PyFreeList_FreeToFull(list, ptr);
}

static inline void
_PyFreeList_Init(_PyFreeList *list, int size, int capacity)
{
list->ptr = NULL;
list->space = list->capacity = capacity;
list->size = size;
}

extern void _PyFreeList_Clear(_PyFreeList *list);
#endif /* WITH_FREELISTS */

#ifdef __cplusplus
}
Expand Down
56 changes: 43 additions & 13 deletions Objects/longobject.c
10000
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_long.h" // _Py_SmallInts
#include "pycore_pymem.h" // Free lists
#include "pycore_object.h" // _PyObject_InitVar()
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "pycore_runtime.h" // _PY_NSMALLPOSINTS
Expand Down Expand Up @@ -152,16 +153,26 @@ _PyLong_New(Py_ssize_t size)
"too many digits in integer");
return NULL;
}
/* Fast operations for single digit integers (including zero)
* assume that there is always at least one digit present. */
Py_ssize_t ndigits = size ? size : 1;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
ndigits*sizeof(digit));
assert(size >= 0);
#if WITH_FREELISTS
if (size <= 1) {
PyInterpreterState *interp = _PyInterpreterState_GET();
result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject));
}
#else
if (size == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't change it for this PR, but this shouldn't be needed, as there should only be one zero. I think this indicates a bug in the caller.
The above assert, assert(size >= 0) should then be assert(size > 0)

result = PyObject_Malloc(sizeof(PyLongObject));
}
#endif
else {
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
size*sizeof(digit));
}
if (!result) {
PyErr_NoMemory();
return NULL;
Expand Down Expand Up @@ -202,10 +213,14 @@ _PyLong_FromMedium(sdigit x)
assert(!IS_SMALL_INT(x));
assert(is_medium_int(x));
/* We could use a freelist here */
#if WITH_FREELISTS
PyInterpreterState *interp = _PyInterpreterState_GET();
PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject));
#else
PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject));
#endif
if (v == NULL) {
PyErr_NoMemory();
return NULL;
return PyErr_NoMemory();
}
Py_ssize_t sign = x < 0 ? -1: 1;
digit abs_x = x < 0 ? -x : x;
Expand Down Expand Up @@ -267,6 +282,21 @@ _PyLong_FromSTwoDigits(stwodigits x)
return _PyLong_FromLarge(x);
}

static void
int_dealloc(PyLongObject *op)
{
#if WITH_FREELISTS
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move the #if WITH_FREELISTS into PyInterpreterState_FreelistFree to keep it localized?

Personally, I think we should just remove WITH_FREELISTS, but that needs a wider discussion.

if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject));
}
else
#endif
{
Py_TYPE(op)->tp_free((PyObject *)op);
}
}

int
_PyLong_AssignValue(PyObject **target, Py_ssize_t value)
{
Expand Down Expand Up @@ -6289,7 +6319,7 @@ PyTypeObject PyLong_Type = {
"int", /* tp_name */
offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */
sizeof(digit), /* tp_itemsize */
0, /* tp_dealloc */
(destructor)int_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
Expand Down
64 changes: 64 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,70 @@ PyObject_Free(void *ptr)
# define LIKELY(value) (value)
#endif


#if WITH_FREELISTS
void *
_PyFreeList_HalfFillAndAllocate(_PyFreeList *list)
{
assert(list->ptr == NULL);
if (list->capacity < 4) {
return PyObject_Malloc(list->size);
}
uint32_t i = 0;
for (; i < list->space>>1; i++) {
void* ptr = PyObject_Malloc(list->size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for future PR: we should add the bulk allocate/free capability to the allocator.

if (ptr == NULL) {
break;
}
*((void**)ptr) = list->ptr;
list->ptr = ptr;
}
if (i == 0) {
return NULL;
}
list->space -= (i-1);
void *result = list->ptr;
list->ptr = *((void **)result);
return result;
}

void
_PyFreeList_Clear(_PyFreeList *list)
{
int space = 0;
void *head = list->ptr;
while (head) {
void *next = *((void**)head);
PyObject_Free(head);
head = next;
space++;
}
list->ptr = NULL;
list->space += space;
}

void
_PyFreeList_FreeToFull(_PyFreeList *list, void *ptr)
{
assert(list->space == 0);
if (list->ptr == NULL) {
PyObject_Free(ptr);
return;
}
int space = 0;
void *head = list->ptr;
while (head) {
void *next = *((void**)head);
PyObject_Free(head);
head = next;
space++;
}
list->ptr = ptr;
*((void **)ptr) = NULL;
list->space = space-1;
}
#endif /* WITH_FREELISTS */

#ifdef WITH_PYMALLOC

#ifdef WITH_VALGRIND
Expand Down
12 changes: 12 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,12 @@ PyInterpreterState_New(void)

init_interpreter(interp, runtime, id, old_head, pending_lock);

#if WITH_FREELISTS
for (int i=0; i < INTERP_NUM_FREELISTS; i++) {
_PyFreeList_Init(&interp->freelists[i], 4 + 2*i, SMALL_OBJECT_FREELIST_SIZE);
}
#endif

HEAD_UNLOCK(runtime);
return interp;

Expand Down Expand Up @@ -681,6 +687,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
}
HEAD_UNLOCK(runtime);

#if WITH_FREELISTS
for (int i=0; i < INTERP_NUM_FREELISTS; i++) {
_PyFreeList_Clear(&interp->freelists[i]);
}
#endif

Py_CLEAR(interp->audit_hooks);

PyConfig_Clear(&interp->config);
Expand Down
0