8000 GH-132554: Specialize `GET_ITER` and `FOR_ITER` for `range` by markshannon · Pull Request #135063 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-132554: Specialize GET_ITER and FOR_ITER for range #135063

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4e34e27
Specialize GET_ITER
markshannon Apr 30, 2025
37757c6
Add news
markshannon Apr 30, 2025
658a422
Specialize FOR_ITER for ranges using tagged ints
markshannon Apr 30, 2025
8cf4e6e
Allow range starts other than zero
markshannon May 27, 2025
11117b4
Specialize GET_ITER for range
markshannon May 28, 2025
d6f033f
Fix assert
markshannon May 28, 2025
4679ccc
Fix GET_ITER stats
markshannon May 29, 2025
06c1680
Fix stats for FOR_ITER specialization
markshannon Jun 3, 2025
15cb250
Remove unused function
markshannon Jun 3, 2025
89d5d85
Correct comment
markshannon Jun 3, 2025
9f688e7
Fix merge glitch
markshannon Jun 3, 2025
7325b44
Fix tier 2 unspecialized iteration over ranges
markshannon Jun 3, 2025
e5f666d
Make functions inline for the JIT
markshannon Jun 3, 2025
8efce0a
Make PyStackRef_BoxInt inline for JIT
markshannon Jun 3, 2025
94ad6f9
Merge branch 'main' into specialize-for-iter-range
markshannon Jun 6, 2025
7ba5753
Fix PyStackRef_TaggedIntLessThan for Py_STACKREF_DEBUG
markshannon Jun 6, 2025
7553e10
Merge branch 'main' into specialize-for-iter-range
markshannon Jun 6, 2025
baf4722
Merge branch 'main' into specialize-for-iter-range
markshannon Jun 9, 2025
122a816
Cleanup _ITER_JUMP_TUPLE
markshannon Jun 10, 2025
9b65402
Merge branch 'main' into specialize-for-iter-range
markshannon Jun 13, 2025
4e44e85
Extend iteration by index to strings and bytes
markshannon Jun 16, 2025
c41d531
Streamline non-specialized FOR_ITER
markshannon Jun 16, 2025
47520ee
Convert INSTRUMENTED_FOR_ITER into macro.
markshannon Jun 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Specialize FOR_ITER for ranges using tagged ints
  • Loading branch information
markshannon committed Jun 3, 2025
commit 658a4225669e6fab85ffdecf3b19bfc847914672
9 changes: 9 additions & 0 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,15 @@ _PyLong_IsNonNegativeCompact(const PyLongObject* op) {
}


/* Return the value of a non-negative compact as a machine int */
static inline Py_ssize_t
_PyLong_GetNonNegativeCompactValue(const PyLongObject* op) {
assert(PyLong_Check(op));
assert (_PyLong_IsNonNegativeCompact(op));
return op->long_value.ob_digit[0];
}


static inline int
_PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) {
assert(PyLong_Check(a));
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ typedef struct {
long len;
} _PyRangeIterObject;

// Does this range have start == 0, step == 1 and step in compact int range?
int _PyRange_IsSimpleCompact(PyObject *range);
Py_ssize_t _PyRange_GetStopIfCompact(PyObject *range);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_stackref.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref)

#define PyStackRef_IsDeferredOrTaggedInt(ref) (((ref).bits & Py_TAG_REFCNT) != 0)

extern _PyStackRef PyStackRef_BoxInt(_PyStackRef i);

#ifdef Py_GIL_DISABLED

#define Py_TAG_DEFERRED Py_TAG_REFCNT
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "pycore_long.h" // _Py_SmallInts
#include "pycore_object.h" // _PyObject_Init()
#include "pycore_runtime.h" // _PY_NSMALLPOSINTS
#include "pycore_stackref.h"
#include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin()
#include "pycore_unicodeobject.h" // _PyUnicode_Equal()

Expand Down Expand Up @@ -6872,3 +6873,19 @@ PyLongWriter_Finish(PyLongWriter *writer)

return (PyObject*)obj;
}

// Tagged int support

_PyStackRef
PyStackRef_BoxInt(_PyStackRef i)
{
assert((i.bits & Py_INT_TAG) == Py_INT_TAG);
intptr_t val = (intptr_t)i.bits;
val = Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, 2);
PyObject *boxed = PyLong_FromSsize_t(val);
if (boxed == NULL) {
return PyStackRef_NULL;
}
return PyStackRef_FromPyObjectSteal(boxed);
}

20 changes: 20 additions & 0 deletions Objects/rangeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,26 @@ range_vectorcall(PyObject *rangetype, PyObject *const *args,
return range_from_array((PyTypeObject *)rangetype, args, nargs);
}

int
_PyRange_IsSimpleCompact(PyObject *range) {
assert(PyRange_Check(range));
rangeobject *r = (rangeobject*)range;
if (r->start == _PyLong_GetZero() && r->step == _PyLong_GetOne() &&
_PyLong_IsNonNegativeCompact((PyLongObject *)r->stop)
) {
return 1;
}
return 0;
}

Py_ssize_t
_PyRange_GetStopIfCompact(PyObject *range) {
assert(PyRange_Check(range));
rangeobject *r = (rangeobject*)range;
assert(_PyLong_IsNonNegativeCompact((PyLongObject *)r->stop));
return _PyLong_GetNonNegativeCompactValue((PyLongObject *)r->stop);
}

PyDoc_STRVAR(range_doc,
"range(stop) -> range object\n\
range(start, stop[, step]) -> range object\n\
Expand Down
114 changes: 67 additions & 47 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3078,11 +3078,20 @@ dummy_func(
index_or_null = PyStackRef_TagInt(0);
}
else {
PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable));
PyStackRef_CLOSE(iterable);
ERROR_IF(iter_o == NULL);
iter = PyStackRef_FromPyObjectSteal(iter_o);
index_or_null = PyStackRef_NULL;
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iterable);
if (tp == &PyRange_Type && _PyRange_IsSimpleCompact(iter_o)) {
Py_ssize_t stop = _PyRange_GetStopIfCompact(iter_o);
PyStackRef_CLOSE(iterable);
iter = PyStackRef_TagInt(stop);
index_or_null = PyStackRef_TagInt(0);
}
else {
iter_o = PyObject_GetIter(iter_o);
PyStackRef_CLOSE(iterable);
ERROR_IF(iter_o == NULL);
iter = PyStackRef_FromPyObjectSteal(iter_o);
index_or_null = PyStackRef_NULL;
}
}
}

Expand Down Expand Up @@ -3160,16 +3169,32 @@ dummy_func(

replaced op(_FOR_ITER, (iter, null_or_index -- iter, null_or_index, next)) {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (PyStackRef_IsTaggedInt(null_or_index)) {
next = _PyForIter_NextWithIndex(iter_o, null_or_index);
if (PyStackRef_IsNull(next)) {
JUMPBY(oparg + 1);
DISPATCH();
if (PyStackRef_IsTaggedInt(iter)) {
if (PyStackRef_Is(iter, null_or_index)) {
null_or_index = PyStackRef_TagInt(-1);
JUMPBY(oparg + 1);
DISPATCH();

}
next = PyStackRef_BoxInt(null_or_index);
if (PyStackRef_IsNull(next)) {
ERROR_NO_POP();
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
}
else {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
next = _PyForIter_NextWithIndex(iter_o, null_or_index);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _PyForIter_NextWithIndex handles the exact lists and exact tuples.

In this PR we have the code to handle range iteration by pushing the index and limit to the stack. Could we simplify _PyForIter_NextWithIndex to only deal with lists and for tuples push the index and length of the tuple to the stack (e.g. use the same approach as range)?

(if this is possible maybe in a followup PR)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you proposing pushing a third value to the stack during iteration?
I doubt that would be worth it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Might not be worth it indeed, but I will check once this pr has settled.

if (PyStackRef_IsNull(next)) {
JUMPBY(oparg + 1);
DISPATCH();
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
}
else {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o);
if (next_o == NULL) {
if (_PyErr_Occurred(tstate)) {
Expand Down Expand Up @@ -3219,10 +3244,25 @@ dummy_func(
inst(INSTRUMENTED_FOR_ITER, (unused/1, iter, null_or_index -- iter, null_or_index, next)) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (PyStackRef_IsTaggedInt(null_or_index)) {
next = _PyForIter_NextWithIndex(iter_o, null_or_index);
if (PyStackRef_IsNull(next)) {
JUMPBY(oparg + 1);
DISPATCH();
if (PyStackRef_IsTaggedInt(iter)) {
if (PyStackRef_Is(iter, null_or_index)) {
null_or_index = PyStackRef_TagInt(-1);
JUMPBY(oparg + 1);
DISPATCH();

}
next = PyStackRef_BoxInt(null_or_index);
if (PyStackRef_IsNull(next)) {
ERROR_NO_POP();
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
}
else {
next = _PyForIter_NextWithIndex(iter_o, null_or_index);
if (PyStackRef_IsNull(next)) {
JUMPBY(oparg + 1);
DISPATCH();
}
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT);
Expand Down Expand Up @@ -3254,10 +3294,10 @@ dummy_func(


op(_ITER_CHECK_LIST, (iter, null_or_index -- iter, null_or_index)) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(Py_TYPE(iter_o) != &PyList_Type);
EXIT_IF(PyStackRef_TYPE(iter) != &PyList_Type);
assert(PyStackRef_IsTaggedInt(null_or_index));
#ifdef Py_GIL_DISABLED
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o));
#endif
}
Expand Down Expand Up @@ -3339,8 +3379,7 @@ dummy_func(
_ITER_NEXT_LIST;

op(_ITER_CHECK_TUPLE, (iter, null_or_index -- iter, null_or_index)) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(Py_TYPE(iter_o) != &PyTuple_Type);
EXIT_IF(PyStackRef_TYPE(iter) != &PyTuple_Type);
assert(PyStackRef_IsTaggedInt(null_or_index));
}

Expand Down Expand Up @@ -3380,21 +3419,11 @@ dummy_func(
_ITER_NEXT_TUPLE;

op(_ITER_CHECK_RANGE, (iter, null_or_index -- iter, null_or_index)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(Py_TYPE(r) != &PyRangeIter_Type);
#ifdef Py_GIL_DISABLED
EXIT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)r));
#endif
EXIT_IF(!PyStackRef_IsTaggedInt(iter));
}

replaced op(_ITER_JUMP_RANGE, (iter, null_or_index -- iter, null_or_index)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(r) == &PyRangeIter_Type);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
#endif
STAT_INC(FOR_ITER, hit);
if (r->len <= 0) {
if (PyStackRef_Is(iter, null_or_index)) {
// Jump over END_FOR instruction.
JUMPBY(oparg + 1);
DISPATCH();
Expand All @@ -3403,24 +3432,15 @@ dummy_func(

// Only used by Tier 2
op(_GUARD_NOT_EXHAUSTED_RANGE, (iter, null_or_index -- iter, null_or_index)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(r) == &PyRangeIter_Type);
EXIT_IF(r->len <= 0);
EXIT_IF(PyStackRef_Is(iter, null_or_index));
}

op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(r) == &PyRangeIter_Type);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
#endif
assert(r->len > 0);
long value = r->start;
r->start = value + r->step;
r->len--;
PyObject *res = PyLong_FromLong(value);
ERROR_IF(res == NULL);
next = PyStackRef_FromPyObjectSteal(res);
next = PyStackRef_BoxInt(null_or_index);
if (PyStackRef_IsNull(next)) {
ERROR_NO_POP();
}
null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index);
}

macro(FOR_ITER_RANGE) =
Expand All @@ -3430,8 +3450,8 @@ dummy_func(
_ITER_NEXT_RANGE;

op(_FOR_ITER_GEN_FRAME, (iter, null -- iter, null, gen_frame: _PyInterpreterFrame*)) {
DEOPT_IF(PyStackRef_TYPE(iter) != &PyGen_Type);
PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter);
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
#ifdef Py_GIL_DISABLED
// Since generators can't be used by multiple threads anyway we
// don't need to deopt here, but this lets us work on making
Expand Down
Loading
0