8000 bpo-47177: Replace `f_lasti` with `prev_instr` by brandtbucher · Pull Request #32208 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-47177: Replace f_lasti with prev_instr #32208

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 7, 2022
Merged
Prev Previous commit
Next Next commit
Use next_instr instead of last_instr
  • Loading branch information
brandtbucher committed Mar 25, 2022
commit 840e14cb4fcf4356b923e2ad17fd8fb24f15cc75
6 changes: 3 additions & 3 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ typedef struct _PyInterpreterFrame {
PyCodeObject *f_code; /* Strong reference */
PyFrameObject *frame_obj; /* Strong reference, may be NULL */
struct _PyInterpreterFrame *previous;
_Py_CODEUNIT *last_instr;
_Py_CODEUNIT *next_instr;
int stacktop; /* Offset of TOS from localsplus */
bool is_entry; // Whether this is the "root" frame for the current _PyCFrame.
char owner;
PyObject *localsplus[1];
} _PyInterpreterFrame;

#define _PyInterpreterFrame_LASTI(IF) \
((int)((IF)->last_instr - _PyCode_CODE((IF)->f_code)))
((int)((IF)->next_instr - _PyCode_CODE((IF)->f_code)) - 1)

static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) {
return f->localsplus + f->f_code->co_nlocalsplus;
Expand Down Expand Up @@ -94,7 +94,7 @@ _PyFrame_InitializeSpecials(
frame->f_locals = Py_XNewRef(locals);
frame->stacktop = nlocalsplus;
frame->frame_obj = NULL;
frame->last_instr = _PyCode_CODE(frame->f_code) - 1;
frame->next_instr = _PyCode_CODE(frame->f_code);
frame->is_entry = false;
frame->owner = FRAME_OWNED_BY_THREAD;
< 8000 /td> }
Expand Down
6 changes: 3 additions & 3 deletions Objects/frameobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ _PyFrame_GetState(PyFrameObject *frame)
if (_PyInterpreterFrame_LASTI(frame->f_frame) < 0) {
return FRAME_CREATED;
}
switch(_PyOpcode_Deopt[_Py_OPCODE(*frame->f_frame->last_instr)]) {
switch(_PyOpcode_Deopt[_Py_OPCODE(frame->f_frame->next_instr[-1])]) {
case COPY_FREE_VARS:
case MAKE_CELL:
case RETURN_GENERATOR:
Expand Down Expand Up @@ -603,7 +603,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore
}
/* Finally set the new lasti and return OK. */
f->f_lineno = 0;
f->f_frame->last_instr = _PyCode_CODE(f->f_frame->f_code) + best_addr;
f->f_frame->next_instr = _PyCode_CODE(f->f_frame->f_code) + best_addr + 1;
return 0;
}

Expand Down Expand Up @@ -930,7 +930,7 @@ _PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame) {
frame->localsplus[offset + i] = o;
}
// COPY_FREE_VARS doesn't have inline CACHEs, either:
frame->last_instr = _PyCode_CODE(frame->f_code);
frame->next_instr = _PyCode_CODE(frame->f_code) + 1;
}
for (int i = 0; i < co->co_nlocalsplus; i++) {
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
Expand Down
8 changes: 4 additions & 4 deletions Objects/genobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,10 @@ _gen_throw(PyGenObject *gen, int close_on_genexit,
/* Termination repetition of SEND loop */
assert(_PyInterpreterFrame_LASTI(frame) >= 0);
/* Backup to SEND */
frame->last_instr--;
assert(_Py_OPCODE(*frame->last_instr) == SEND);
int jump = _Py_OPARG(*frame->last_instr);
frame->last_instr += jump;
frame->next_instr--;
assert(_Py_OPCODE(frame->next_instr[-1]) == SEND);
int jump = _Py_OPARG(frame->next_instr[-1]);
frame->next_instr += jump;
if (_PyGen_FetchStopIterationValue(&val) == 0) {
ret = gen_send(gen, val);
Py_DECREF(val);
Expand Down
20 changes: 10 additions & 10 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1247,13 +1247,13 @@ eval_frame_handle_pending(PyThreadState *tstate)
#ifdef Py_STATS
#define INSTRUCTION_START(op) \
do { \
frame->last_instr = next_instr++; \
frame->next_instr = ++next_instr; \
OPCODE_EXE_INC(op); \
_py_stats.opcode_stats[lastopcode].pair_count[op]++; \
lastopcode = op; \
} while (0)
#else
#define INSTRUCTION_START(op) (frame->last_instr = next_instr++)
#define INSTRUCTION_START(op) (frame->next_instr = ++next_instr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's something to consider.

Rather that storing next_instr, we could store last_instr (strictly, next_instr_less_one) as it would break the dependency of the store on the addition.
frame->next_instr_less_one = next_instr; next_instr++
The compiler can then merge the next_instr++ with any subsequent JUMPBY(INLINE_CACHE_ENTRIES_...)

It would also simplify the changes as you wouldn't need to change all the offsets by one.

Thoughts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. I'll try that out in another branch.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The performance numbers show no difference between that branch and this one. The other branch does result in a net reduction of "adjust by 1" moves, so I think I slightly prefer it.

Thoughts?

Copy link
Member
@markshannon markshannon Apr 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer the other version. It should be quicker, and feels less intrusive. I'm not surprised that there is no measurable difference in performance, I would expect it to be very slight.

#endif

#if USE_COMPUTED_GOTOS
Expand Down Expand Up @@ -1644,7 +1644,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
} \
assert(_PyInterpreterFrame_LASTI(frame) >= -1); \
/* Jump back to the last instruction executed... */ \
next_instr = frame->last_instr + 1; \
next_instr = frame->next_instr; \
stack_pointer = _PyFrame_GetStackPointer(frame); \
/* Set stackdepth to -1. \
Update when returning or calling trace function. \
Expand Down Expand Up @@ -2195,7 +2195,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->last_instr += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
frame->next_instr += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
Expand Down Expand Up @@ -2599,7 +2599,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
if (oparg) {
PyObject *lasti = PEEK(oparg + 1);
if (PyLong_Check(lasti)) {
frame->last_instr = first_instr + PyLong_AsLong(lasti);
frame->next_instr = first_instr + PyLong_AsLong(lasti) + 1;
assert(!_PyErr_Occurred(tstate));
}
else {
Expand Down Expand Up @@ -4611,7 +4611,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
goto error;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->last_instr += INLINE_CACHE_ENTRIES_CALL;
frame->next_instr += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
Expand Down Expand Up @@ -4716,7 +4716,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
STACK_SHRINK(2-is_meth);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->last_instr += INLINE_CACHE_ENTRIES_CALL;
frame->next_instr += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
goto start_frame;
Expand Down Expand Up @@ -4756,7 +4756,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
STACK_SHRINK(2-is_meth);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->last_instr += INLINE_CACHE_ENTRIES_CALL;
frame->next_instr += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
goto start_frame;
Expand Down Expand Up @@ -5414,7 +5414,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
{
if (tstate->tracing == 0) {
int instr_prev = _PyInterpreterFrame_LASTI(frame);
frame->last_instr = next_instr;
frame->next_instr = next_instr + 1;
TRACING_NEXTOPARG();
switch(opcode) {
case COPY_FREE_VARS:
Expand Down Expand Up @@ -6740,7 +6740,7 @@ maybe_call_line_trace(Py_tracefunc func, PyObject *obj,
(_PyInterpreterFrame_LASTI(frame) < instr_prev &&
// SEND has no quickened forms, so no need to use _PyOpcode_Deopt
// here:
_Py_OPCODE(*frame->last_instr) != SEND);
_Py_OPCODE(frame->next_instr[-1]) != SEND);
if (trace) {
result = call_trace(func, obj, tstate, frame, PyTrace_LINE, Py_None);
< A20F /td> }
Expand Down
4 changes: 2 additions & 2 deletions Tools/gdb/libpython.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,9 +1016,9 @@ def _f_nlocalsplus(self):

def _f_lasti(self):
_Py_CODEUNIT = gdb.lookup_type("_Py_CODEUNIT")
last_instr = self._gdbval["last_instr"]
next_instr = self._gdbval["next_instr"]
first_instr = self._f_code().field("co_code_adaptive").cast(_Py_CODEUNIT.pointer())
return int(last_instr - first_instr)
return int(next_instr - first_instr - 1)

def is_entry(self):
return self._f_special("is_entry", bool)
Expand Down
0