8000 GH-100240: Generic freelist, applied to ints by iritkatriel · Pull Request #101453 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-100240: Generic freelist, applied to ints #101453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extern "C" {
#include "pycore_dict_state.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_pymem.h" // free lists
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gc.h" // struct _gc_runtime_state
Expand Down Expand Up @@ -50,6 +51,9 @@ struct _Py_long_state {

/* interpreter state */

#define SMALL_OBJECT_FREELIST_SIZE 1024
#define INTERP_NUM_FREELISTS 30

/* PyInterpreterState holds the global state for one of the runtime's
interpreters. Typically the initial (main) interpreter is the only one.

Expand Down Expand Up @@ -178,6 +182,7 @@ struct _is {
struct _Py_context_state context;
struct _Py_exc_state exc_state;

_PyFreeList freelists[INTERP_NUM_FREELISTS];
struct ast_state ast;
struct types_state types;
struct callable_cache callable_cache;
Expand Down Expand Up @@ -230,6 +235,27 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *);

#define FREELIST_QUANTUM (2*sizeof(void*))
#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/FREELIST_QUANTUM)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this formula?

You want to allocate all sizes from (n-1)*FREELIST_QUANTUM + 1 to n*FREELIST_QUANTUM (inclusive) from the same freelist.
I would expect the formula to be (size + FREELIST_QUANTUM -1)/FREELIST_QUANTUM, or (size-1)/FREELIST_QUANTUM.

And because C division rounds towards 0, not -infinity, you want to use >>LOG_BASE_2_OF_FREELIST_QUANTUM, not /FREELIST_QUANTUM.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't changed this yet. I'm trying to get it to work with just ints.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There already are some macros in pycore_obmalloc.h that seem to do something like this:

/* 
 * Alignment of addresses returned to the user. 8-bytes alignment works
 * on most current architectures (with 32-bit or 64-bit address buses).
 * The alignment value is also used for grouping small requests in size
 * classes spaced ALIGNMENT bytes apart.
 *
 * You shouldn't change this unless you know what you are doing.
 */

#if SIZEOF_VOID_P > 4
#define ALIGNMENT              16               /* must be 2^N */
#define ALIGNMENT_SHIFT         4
#else
#define ALIGNMENT               8               /* must be 2^N */
#define ALIGNMENT_SHIFT         3
#endif
   
/* Return the number of bytes in size class I, as a uint. */
#define INDEX2SIZE(I) (((pymem_uint)(I) + 1) << ALIGNMENT_SHIFT)

#define FREELIST_INDEX_TO_SIZE(idx) (FREELIST_QUANTUM*(idx) + 4)

static inline PyObject*
_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) {
int index = SIZE_TO_FREELIST_INDEX(size);
assert(index >= 0 && index < INTERP_NUM_FREELISTS);
return _PyFreeList_Alloc(&interp->freelists[index]);
}

static inline void
_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) {
/* todo: assert the size is correct? */
int index = SIZE_TO_FREELIST_INDEX(size);
assert(index >= 0 && index < INTERP_NUM_FREELISTS);
_PyFreeList_Free(&interp->freelists[index], op);
}



#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ _PyLong_IsPositiveSingleDigit(PyObject* sub) {
return ((size_t)signed_size) <= 1;
}

void _PyLong_Free(PyLongObject *op);

#ifdef __cplusplus
}
#endif
Expand Down
67 changes: 67 additions & 0 deletions Include/internal/pycore_pymem.h
5D32
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,73 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName(
PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator);


#if WITH_FREELISTS
/* Free lists.
*
* Free lists have a pointer to their first entry and
* the amount of space available allowing fast checks
* for emptiness and fullness.
* When empty they are half filled and when full they are
* completely emptied. This helps the underlying allocator
* avoid fragmentation and helps performance.
*/

typedef struct _freelist {
void *ptr;
uint32_t space;
uint16_t size;
uint16_t capacity;
} _PyFreeList;

extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list);
extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr);


static inline void *
_PyFreeList_Alloc(_PyFreeList *list) {
if (list->ptr != NULL) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.from_generic_freelist++;
#endif
void *result = list->ptr;
list->ptr = *((void **)result);
list->space++;
return result;
}
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.generic_freelist_empty++;
#endif
return _PyFreeList_HalfFillAndAllocate(list);
}

static inline void
_PyFreeList_Free(_PyFreeList *list, void *ptr) {
if (list->space) {
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.to_generic_freelist++;
#endif
*((void **)ptr) = list->ptr;
list->ptr = ptr;
list->space--;
return;
}
#ifdef Py_STATS
if (_py_stats) _py_stats->object_stats.generic_freelist_full++;
#endif
_PyFreeList_FreeToFull(list, ptr);
}

static inline void
_PyFreeList_Init(_PyFreeList *list, int size, int capacity)
{
list->ptr = NULL;
list->space = list->capacity = capacity;
list->size = size;
}

extern void _PyFreeList_Clear(_PyFreeList *list);
#endif /* WITH_FREELISTS */

#ifdef __cplusplus
}
#endif
AE96 Expand Down
4 changes: 4 additions & 0 deletions Include/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ typedef struct _object_stats {
uint64_t frees;
uint64_t to_freelist;
uint64_t from_freelist;
uint64_t to_generic_freelist;
uint64_t from_generic_freelist;
uint64_t generic_freelist_empty;
uint64_t generic_freelist_full;
uint64_t new_values;
uint64_t dict_materialized_on_request;
uint64_t dict_materialized_new_key;
Expand Down
50 changes: 34 additions & 16 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_long.h" // _Py_SmallInts
#include "pycore_pymem.h" // Free lists
#include "pycore_object.h" // _PyObject_InitVar()
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "pycore_runtime.h" // _PY_NSMALLPOSINTS
Expand Down Expand Up @@ -46,7 +47,7 @@ static inline void
_Py_DECREF_INT(PyLongObject *op)
{
assert(PyLong_CheckExact(op));
_Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)_PyLong_Free);
}

static inline int
Expand Down Expand Up @@ -152,16 +153,20 @@ _PyLong_New(Py_ssize_t size)
"too many digits in integer");
return NULL;
}
/* Fast operations for single digit integers (including zero)
* assume that there is always at least one digit present. */
Py_ssize_t ndigits = size ? size : 1;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
ndigits*sizeof(digit));
assert(size >= 0);
if (size <= 1) {
PyInterpreterState *interp = _PyInterpreterState_GET();
result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject));
}
else {
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
incorrect in the presence of padding between the PyVarObject header
and the digits. */
result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) +
size*sizeof(digit));
}
if (!result) {
PyErr_NoMemory();
return NULL;
Expand Down Expand Up @@ -201,11 +206,11 @@ _PyLong_FromMedium(sdigit x)
{
assert(!IS_SMALL_INT(x));
assert(is_medium_int(x));
/* We could use a freelist here */
PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject));
PyInterpreterState *interp = _PyInterpreterState_GET();
PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc(
interp, sizeof(PyLongObject));
if (v == NULL) {
PyErr_NoMemory();
return NULL;
return PyErr_NoMemory();
}
Py_ssize_t sign = x < 0 ? -1: 1;
digit abs_x = x < 0 ? -x : x;
Expand Down Expand Up @@ -267,6 +272,19 @@ _PyLong_FromSTwoDigits(stwodigits x)
return _PyLong_FromLarge(x);
}

void
_PyLong_Free(PyLongObject *op)
{
if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject));
}
else
{
Py_TYPE(op)->tp_free((PyObject *)op);
}
}

int
_PyLong_AssignValue(PyObject **target, Py_ssize_t value)
{
Expand Down Expand Up @@ -6289,7 +6307,7 @@ PyTypeObject PyLong_Type = {
"int", /* tp_name */
offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */
sizeof(digit), /* tp_itemsize */
0, /* tp_dealloc */
(destructor)_PyLong_Free, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
Expand Down
54 changes: 54 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,60 @@ PyObject_Free(void *ptr)
# define LIKELY(value) (value)
#endif


#if WITH_FREELISTS
void *
_PyFreeList_HalfFillAndAllocate(_PyFreeList *list)
{
assert(list->ptr == NULL);
if (list->capacity < 4) {
return PyObject_Malloc(list->size);
}
uint32_t i = 0;
for (; i < list->space>>1; i++) {
void* ptr = PyObject_Malloc(list->size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for future PR: we should add the bulk allocate/free capability to the allocator.

if (ptr == NULL) {
break;
}
*((void**)ptr) = list->ptr;
list->ptr = ptr;
}
if (i == 0) {
return NULL;
}
void *result = list->ptr;
list->ptr = *((void **)result);
list->space -= (i-1);
return result;
}

void
_PyFreeList_Clear(_PyFreeList *list)
{
int space = 0;
void *head = list->ptr;
while (head) {
void *next = *((void**)head);
PyObject_Free(head);
head = next;
space++;
}
list->ptr = NULL;
list->space += space;
}

void
_PyFreeList_FreeToFull(_PyFreeList *list, void *ptr)
{
assert(list->space == 0);
PyObject_Free(ptr);
if (list->ptr == NULL) {
return;
}
_PyFreeList_Clear(list);
}
#endif /* WITH_FREELISTS */

#ifdef WITH_PYMALLOC

#ifdef WITH_VALGRIND
Expand Down
22 changes: 11 additions & 11 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ dummy_func(
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free);
ERROR_IF(prod == NULL, error);
}

Expand All @@ -207,8 +207,8 @@ dummy_func(
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free);
ERROR_IF(sub == NULL, error);
}

Expand Down Expand Up @@ -290,8 +290,8 @@ dummy_func(
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP);
STAT_INC(BINARY_OP, hit);
sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free);
ERROR_IF(sum == NULL, error);
}

Expand Down Expand Up @@ -364,7 +364,7 @@ dummy_func(
res = PyList_GET_ITEM(list, index);
assert(res != NULL);
Py_INCREF(res);
_Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free);
Py_DECREF(list);
}

Expand All @@ -382,7 +382,7 @@ dummy_func(
res = PyTuple_GET_ITEM(tuple, index);
assert(res != NULL);
Py_INCREF(res);
_Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free);
Py_DECREF(tuple);
}

Expand Down Expand Up @@ -478,7 +478,7 @@ dummy_func(
PyList_SET_ITEM(list, index, value);
assert(old_value != NULL);
Py_DECREF(old_value);
7E38 _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free);
Py_DECREF(list);
}

Expand Down Expand Up @@ -1838,8 +1838,8 @@ dummy_func(
Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->long_value.ob_digit[0];
// 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg
int sign_ish = COMPARISON_BIT(ileft, iright);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free);
if (sign_ish & oparg) {
int offset = _Py_OPARG(next_instr[1]);
JUMPBY(offset);
Expand Down
Loading
0