From 29db898e02a3f5b7f85a41754be77b583ce1dcd5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:20:22 +0800 Subject: [PATCH 001/111] abstract interp --- Tools/cases_generator/stack.py | 2 +- .../tier2_abstract_generator.py | 397 ++++++++++++++++++ 2 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 Tools/cases_generator/tier2_abstract_generator.py diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6633950aada002..d2d7e1dcb39392 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from cwriter import CWriter -UNUSED = {"unused"} +UNUSED = {"unused", "__unused_"} def maybe_parenthesize(sym: str) -> str: diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py new file mode 100644 index 00000000000000..d2fac694a7fd05 --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -0,0 +1,397 @@ +"""Generate the cases for the tier 2 abstract interpreter. +Reads the instruction definitions from bytecodes.c. +Writes the cases to abstract_interp_cases.c.h, which is #included in optimizer_analysis.c +""" + +import argparse +import os.path +import sys +import dataclasses + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + REPLACEMENT_FUNCTIONS, +) +from tier2_generator import tier2_replace_error +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch, UNUSED + +DEFAULT_OUTPUT = ROOT / "Python/abstract_interp_cases.c.h" + +SPECIALLY_HANDLED_ABSTRACT_INSTR = { + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", + "POP_TOP", + "PUSH_NULL", + "END_SEND", + "SWAP", + + # Frame stuff + "_PUSH_FRAME", + "_POP_FRAME", + "_SHRINK_STACK", + + + # Shouldn't appear in abstract interpreter + "_LOAD_FAST_NO_INCREF", + "_LOAD_CONST_IMMEDIATE", + "_SWAP_AND_POP", + "_STORE_COMMON", + "_LOAD_COMMON", +} + +def declare_variables( + uop: Uop, + out: CWriter, + default_type: str = "_Py_UOpsSymbolicExpression *", + skip_inputs: bool = False, + skip_peeks: bool = False, +) -> None: + variables = set(UNUSED) + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if skip_peeks and var.peek: + continue + if var.name not in variables: + type = var.type if var.type else default_type + if var.size > '1' and type == "PyObject **": + type = "_Py_UOpsSymbolicExpression **" + variables.add(var.name) + if var.condition: + out.emit(f"{type}{var.name} = NULL;\n") + else: + out.emit(f"{type}{var.name};\n") + for var in uop.stack.outputs: + if skip_peeks and var.peek: + continue + if var.name not in variables: + variables.add(var.name) + type = var.type if var.type else default_type + if var.condition: + out.emit(f"{type}{var.name} = NULL;\n") + else: + out.emit(f"{type}{var.name};\n") + + +def tier2_replace_deopt( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, +) -> None: + out.emit_at("if ", tkn) + out.emit(next(tkn_iter)) + emit_to(out, tkn_iter, "RPAREN") + next(tkn_iter) # Semi colon + out.emit(") goto error;\n") + + +TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy() +TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error +TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt + + +def _write_body_abstract_interp_impure_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + + # Simply make all outputs effects unknown + + for var in mangled_uop.stack.outputs: + if var.name in UNUSED or var.peek: + continue + + if var.size == '1': + out.emit(f"{var.name} = sym_init_unknown(ctx);\n") + out.emit(f"if({var.name} == NULL) goto error;\n") + if var.name in ("null", "__null_"): + out.emit(f"sym_set_type({var.name}, NULL_TYPE, 0);\n") + else: + out.emit(f"for (int case_gen_i = 0; case_gen_i < {var.size}; case_gen_i++) {{\n") + out.emit(f"{var.name}[case_gen_i] = sym_init_unknown(ctx);\n") + out.emit(f"if({var.name}[case_gen_i] == NULL) goto error;\n") + out.emit("}\n") + + +def mangle_uop_names(uop: Uop) -> Uop: + uop = dataclasses.replace(uop) + new_stack = dataclasses.replace(uop.stack) + new_stack.inputs = [dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.inputs] + new_stack.outputs = [dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.outputs] + uop.stack = new_stack + return uop + +# Returns a tuple of a pointer to an array of subexpressions, the length of said array +# and a string containing the join of all other subexpressions obtained from stack input. +# This grabs variadic inputs that depend on things like oparg or cache +def get_subexpressions(input_vars: list[StackItem]) -> tuple[str, int, str]: + arr_var = [(var.name, var) for var in input_vars if var.size > '1'] + assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" + arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None + arr_var_size = (arr_var[0][1].size or 0) if arr_var_name is not None else 0 + if arr_var_name is not None: + input_vars.remove(arr_var[0][1]) + var = ", ".join([v.name for v in input_vars]) + if var: + var = ", " + var + return arr_var_name, arr_var_size, var + +def new_sym( + constant: str | None, + arr_var_size: int | str | None, + arr_var_name: str | None, + subexpresion_count: int | str, + subexpressions: str +) -> str: + return ( + f"_Py_UOpsSymbolicExpression_New(" + f"ctx, *inst, {constant or 'NULL'}, " + f"{arr_var_size or 0}, {arr_var_name or 'NULL'}, " + f"{subexpresion_count} {subexpressions});" + ) + + +def _write_body_abstract_interp_pure_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + + arr_var_name, arr_var_size, subexpressions = get_subexpressions(mangled_uop.stack.inputs) + + assert len(uop.stack.outputs) == 1, f"Currently we only support 1 stack output for pure ops: {uop}" + # uop is mandatory - we cannot const evaluate it + if uop.properties.mandatory: + out.emit(f"{mangled_uop.stack.outputs[0].name} = {new_sym(None, arr_var_size, arr_var_name, len(mangled_uop.stack.inputs), subexpressions)}") + return + + + # Constant prop only handles one output, and no variadic inputs. + # Perhaps in the future we can support these. + if all(input.size == '1' for input in uop.stack.inputs): + # We can try a constant evaluation + out.emit("// Constant evaluation\n") + predicates = " && ".join([f"is_const({var.name})" for var in mangled_uop.stack.inputs if var.name not in UNUSED]) + + out.emit(f"if ({predicates or 0}){{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + maybe_const_val = new_sym(f'(PyObject *){uop.stack.outputs[0].name}', None, None, + len(mangled_uop.stack.inputs), subexpressions) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") + + out.emit("}\n") + out.emit("else {\n") + sym = new_sym(None, None, None, + len(mangled_uop.stack.inputs), subexpressions) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit("}\n") + else: + sym = new_sym(None, None, None, + len(mangled_uop.stack.inputs), subexpressions) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") + + # Perform type propagation + if (typ := uop.stack.outputs[0].typeprop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + out.emit("// Type propagation\n") + out.emit( + f"sym_set_type({mangled_uop.stack.outputs[0].name}, {typname}, (uint32_t){aux});" + ) + +def _write_body_abstract_interp_guard_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + # 1. Attempt to perform guard elimination + # 2. Type propagate for guard success + if uop.properties.mandatory: + out.emit("goto guard_required;") + return + + for cache in uop.caches: + if cache.name not in UNUSED: + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") + + out.emit("// Constant evaluation \n") + # TODO if we encode all type information of constants, then we shouldn't even need + # this part, and we can just do a type check. + predicates_str = " && ".join([f"is_const({var.name})" for var in mangled_uop.stack.inputs if var.name not in UNUSED]) + if predicates_str: + out.emit(f"if ({predicates_str}) {{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + if var.name in UNUSED: + continue + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + # Guard elimination - if we are successful, don't add it to the symexpr! + out.emit('DPRINTF(3, "const eliminated guard\\n");\n') + out.emit("break;\n") + out.emit("}\n") + + # Does the input specify typed inputs? + if not any(output_var.typeprop for output_var in mangled_uop.stack.outputs): + return + # If the input types already match, eliminate the guard + # Read the cache information to check the auxiliary type information + predicates = [] + propagates = [] + + assert len(mangled_uop.stack.outputs) == len(mangled_uop.stack.inputs), "guards must have same number of args" + assert [output == input_ for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs)], \ + "guards must forward their stack values" + for output_var in mangled_uop.stack.outputs: + if output_var.name in UNUSED: + continue + if (typ := output_var.typeprop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + # Check that the input type information match (including auxiliary info) + predicates.append( + f"sym_matches_type((_Py_UOpsSymbolicExpression *){output_var.name}, {typname}, (uint32_t){aux})" + ) + # Propagate mode - set the types + propagates.append( + f"sym_set_type((_Py_UOpsSymbolicExpression *){output_var.name}, {typname}, (uint32_t){aux})" + ) + + out.emit("// Type guard elimination \n") + out.emit(f"if ({' && '.join(predicates)}){{\n") + out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') + out.emit("break;\n") + out.emit("}\n") + # Else we need the guard + out.emit("else {\n") + out.emit("// Type propagation \n") + for prop in propagates: + out.emit(f"{prop};\n") + out.emit("goto guard_required;\n") + out.emit("}\n") + + + + +def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: + try: + out.start_line() + for var in reversed(mangled_uop.stack.inputs): + is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard + old_var_name = var.name + if is_impure: + var.name = "unused" + out.emit(stack.pop(var)) + var.name = old_var_name + if not mangled_uop.properties.stores_sp: + for i, var in enumerate(mangled_uop.stack.outputs): + out.emit(stack.push(var)) + # emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + if uop.properties.pure: + _write_body_abstract_interp_pure_uop(mangled_uop, uop, out, stack) + elif uop.properties.guard: + _write_body_abstract_interp_guard_uop(mangled_uop, uop, out, stack) + else: + _write_body_abstract_interp_impure_uop(mangled_uop, uop, out, stack) + except SizeMismatch as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_tier2_abstract( + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool +) -> None: + write_header(__file__, filenames, outfile) + outfile.write( + """ +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 +""" + ) + out = CWriter(outfile, 2, lines) + out.emit("\n") + for name, uop in analysis.uops.items(): + if name in SPECIALLY_HANDLED_ABSTRACT_INSTR: + continue + if uop.properties.tier_one_only: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + mangled_uop = mangle_uop_names(uop) + is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard + declare_variables(mangled_uop, out, skip_inputs=is_impure, skip_peeks=is_impure) + stack = Stack() + write_abstract_uop(mangled_uop, uop, out, stack) + out.start_line() + if not uop.properties.always_exits: + stack.flush(out) + out.emit("break;\n") + out.start_line() + out.emit("}") + out.emit("\n\n") + outfile.write("#undef TIER_TWO\n") + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + +arg_parser.add_argument( + "-l", "--emit-line-directives", help="Emit #line directives", action="store_true" +) + +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) + data = analyze_files(args.input) + with open(args.output, "w") as outfile: + generate_tier2_abstract(args.input, data, outfile, args.emit_line_directives) From c1332ccb0e6d0632d70485b55f1e1037eb5c6212 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:29:07 +0800 Subject: [PATCH 002/111] the abstract interpreter --- .gitattributes | 1 + Makefile.pre.in | 6 +- Python/abstract_interp_cases.c.h | 1955 +++++++++++++++++ .../tier2_abstract_generator.py | 27 +- 4 files changed, 1975 insertions(+), 14 deletions(-) create mode 100644 Python/abstract_interp_cases.c.h diff --git a/.gitattributes b/.gitattributes index 2a48df079e1aeb..22afffb05abb20 100644 --- a/.gitattributes +++ b/.gitattributes @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/abstract_interp_cases.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Makefile.pre.in b/Makefile.pre.in index 289ab97666e902..6f19e0c513a2c9 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1603,6 +1603,8 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ + -o $(srcdir)/Python/abstract_interp_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ -o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \ @@ -1614,6 +1616,7 @@ regen-cases: $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/abstract_interp_cases.c.h $(srcdir)/Python/abstract_interp_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h @@ -1635,7 +1638,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ - $(srcdir)/Include/internal/pycore_optimizer.h + $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Python/abstract_interp_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h new file mode 100644 index 00000000000000..093099164d6dc4 --- /dev/null +++ b/Python/abstract_interp_cases.c.h @@ -0,0 +1,1955 @@ +// This file is generated by Tools/cases_generator/tier2_abstract_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 + + case _NOP: { + break; + } + + case _RESUME_CHECK: { + break; + } + + /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + + case _UNARY_NEGATIVE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _UNARY_NOT: { + _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicExpression *__res_; + __value_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__value_)){ + PyObject *value; + PyObject *res; + value = get_const(__value_); + assert(PyBool_Check(value)); + res = Py_IsFalse(value) ? Py_True : Py_False; + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 1 , __value_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 1 , __value_); + } + if (__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_BOOL: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__value_)) { + PyObject *value; + value = get_const(__value_); + if (!PyBool_Check(value)) goto error; + STAT_INC(TO_BOOL, hit); + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _TO_BOOL_INT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_LIST: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_NONE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_STR: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_ALWAYS_TRUE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _UNARY_INVERT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _GUARD_BOTH_INT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyLong_CheckExact(left)) goto error; + if (!PyLong_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_INT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_FLOAT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyFloat_CheckExact(left)) goto error; + if (!PyFloat_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval * + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_FLOAT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval - + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_UNICODE: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyUnicode_CheckExact(left)) goto error; + if (!PyUnicode_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _BINARY_OP_ADD_UNICODE: { + _Py_UOpsSymbolicExpression *__right_; + _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicExpression *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)){ + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = PyUnicode_Concat(left, right); + _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); + } + else { + __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYUNICODE_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SLICE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-3] = __res_; + stack_pointer += -2; + break; + } + + case _STORE_SLICE: { + stack_pointer += -4; + break; + } + + case _BINARY_SUBSCR_LIST_INT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_STR_INT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_TUPLE_INT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_DICT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + + case _LIST_APPEND: { + stack_pointer += -1; + break; + } + + case _SET_ADD: { + stack_pointer += -1; + break; + } + + case _STORE_SUBSCR: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_LIST_INT: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_DICT: { + stack_pointer += -3; + break; + } + + case _DELETE_SUBSCR: { + stack_pointer += -2; + break; + } + + case _CALL_INTRINSIC_1: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _CALL_INTRINSIC_2: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + /* _INSTRUMENTED_RETURN_VALUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ + + case _GET_AITER: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + case _GET_ANEXT: { + _Py_UOpsSymbolicExpression *__awaitable_; + __awaitable_ = sym_init_unknown(ctx); + if(__awaitable_ == NULL) goto error; + stack_pointer[0] = __awaitable_; + stack_pointer += 1; + break; + } + + case _GET_AWAITABLE: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + /* _SEND is not a viable micro-op for tier 2 */ + + /* _SEND_GEN is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_YIELD_VALUE is not a viable micro-op for tier 2 */ + + case _POP_EXCEPT: { + stack_pointer += -1; + break; + } + + case _LOAD_ASSERTION_ERROR: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case _LOAD_BUILD_CLASS: { + _Py_UOpsSymbolicExpression *__bc_; + __bc_ = sym_init_unknown(ctx); + if(__bc_ == NULL) goto error; + stack_pointer[0] = __bc_; + stack_pointer += 1; + break; + } + + case _STORE_NAME: { + stack_pointer += -1; + break; + } + + case _DELETE_NAME: { + break; + } + + case _UNPACK_SEQUENCE: { + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TWO_TUPLE: { + PyObject **__values_; + __values_ = &stack_pointer[-1]; + for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + __values_[case_gen_i] = sym_init_unknown(ctx); + if(__values_[case_gen_i] == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TUPLE: { + PyObject **__values_; + __values_ = &stack_pointer[-1]; + for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + __values_[case_gen_i] = sym_init_unknown(ctx); + if(__values_[case_gen_i] == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_LIST: { + PyObject **__values_; + __values_ = &stack_pointer[-1]; + for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + __values_[case_gen_i] = sym_init_unknown(ctx); + if(__values_[case_gen_i] == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_EX: { + stack_pointer += (oparg >> 8) + (oparg & 0xFF); + break; + } + + case _STORE_ATTR: { + stack_pointer += -2; + break; + } + + case _DELETE_ATTR: { + stack_pointer += -1; + break; + } + + case _STORE_GLOBAL: { + stack_pointer += -1; + break; + } + + case _DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + _Py_UOpsSymbolicExpression *__locals_; + __locals_ = sym_init_unknown(ctx); + if(__locals_ == NULL) goto error; + stack_pointer[0] = __locals_; + stack_pointer += 1; + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + _Py_UOpsSymbolicExpression *__v_; + __v_ = sym_init_unknown(ctx); + if(__v_ == NULL) goto error; + stack_pointer[-1] = __v_; + break; + } + + case _LOAD_NAME: { + _Py_UOpsSymbolicExpression *__v_; + __v_ = sym_init_unknown(ctx); + if(__v_ == NULL) goto error; + stack_pointer[0] = __v_; + stack_pointer += 1; + break; + } + + case _LOAD_GLOBAL: { + _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _DELETE_FAST: { + break; + } + + case _MAKE_CELL: { + break; + } + + case _DELETE_DEREF: { + break; + } + + case _LOAD_FROM_DICT_OR_DEREF: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[-1] = __value_; + break; + } + + case _LOAD_DEREF: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case _STORE_DEREF: { + stack_pointer += -1; + break; + } + + case _COPY_FREE_VARS: { + break; + } + + case _BUILD_STRING: { + _Py_UOpsSymbolicExpression *__str_; + __str_ = sym_init_unknown(ctx); + if(__str_ == NULL) goto error; + stack_pointer[-oparg] = __str_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_TUPLE: { + _Py_UOpsSymbolicExpression *__tup_; + __tup_ = sym_init_unknown(ctx); + if(__tup_ == NULL) goto error; + stack_pointer[-oparg] = __tup_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_LIST: { + _Py_UOpsSymbolicExpression *__list_; + __list_ = sym_init_unknown(ctx); + if(__list_ == NULL) goto error; + stack_pointer[-oparg] = __list_; + stack_pointer += 1 - oparg; + break; + } + + case _LIST_EXTEND: { + stack_pointer += -1; + break; + } + + case _SET_UPDATE: { + stack_pointer += -1; + break; + } + + case _BUILD_SET: { + _Py_UOpsSymbolicExpression *__set_; + __set_ = sym_init_unknown(ctx); + if(__set_ == NULL) goto error; + stack_pointer[-oparg] = __set_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_MAP: { + _Py_UOpsSymbolicExpression *__map_; + __map_ = sym_init_unknown(ctx); + if(__map_ == NULL) goto error; + stack_pointer[-oparg*2] = __map_; + stack_pointer += 1 - oparg*2; + break; + } + + case _SETUP_ANNOTATIONS: { + break; + } + + case _BUILD_CONST_KEY_MAP: { + _Py_UOpsSymbolicExpression *__map_; + __map_ = sym_init_unknown(ctx); + if(__map_ == NULL) goto error; + stack_pointer[-1 - oparg] = __map_; + stack_pointer += -oparg; + break; + } + + case _DICT_UPDATE: { + stack_pointer += -1; + break; + } + + case _DICT_MERGE: { + stack_pointer += -1; + break; + } + + case _MAP_ADD: { + stack_pointer += -2; + break; + } + + /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ + + case _LOAD_SUPER_ATTR_ATTR: { + _Py_UOpsSymbolicExpression *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-3] = __attr_; + stack_pointer += -2 + ((0) ? 1 : 0); + break; + } + + case _LOAD_SUPER_ATTR_METHOD: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__self_or_null_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_or_null_ = sym_init_unknown(ctx); + if(__self_or_null_ == NULL) goto error; + stack_pointer[-3] = __attr_; + stack_pointer[-2] = __self_or_null_; + stack_pointer += -1; + break; + } + + case _LOAD_ATTR: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__self_or_null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_or_null_ = sym_init_unknown(ctx); + if(__self_or_null_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __self_or_null_; + stack_pointer += (oparg & 1); + break; + } + + case _GUARD_TYPE_VERSION: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *tp = Py_TYPE(owner); + assert(type_version != 0); + if (tp->tp_version_tag != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); + goto guard_required; + } + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + goto guard_required; + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_MODULE: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + if (!PyModule_CheckExact(owner)) goto error; + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; + assert(dict != NULL); + if (dict->ma_keys->dk_version != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _LOAD_ATTR_MODULE: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_WITH_HINT: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + if (_PyDictOrValues_IsValues(dorv)) goto error; + PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); + if (dict == NULL) goto error; + assert(PyDict_CheckExact((PyObject *)dict)); + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _LOAD_ATTR_WITH_HINT: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _LOAD_ATTR_SLOT: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_CLASS: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + if (!PyType_Check(owner)) goto error; + assert(type_version != 0); + if (((PyTypeObject *)owner)->tp_version_tag != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _LOAD_ATTR_CLASS: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __null_ = sym_init_unknown(ctx); + if(__null_ == NULL) goto error; + sym_set_type(__null_, NULL_TYPE, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + /* _LOAD_ATTR_PROPERTY is not a viable micro-op for tier 2 */ + + /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ + + case _GUARD_DORV_VALUES: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + if (!_PyDictOrValues_IsValues(dorv)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _STORE_ATTR_INSTANCE_VALUE: { + stack_pointer += -2; + break; + } + + /* _STORE_ATTR_WITH_HINT is not a viable micro-op for tier 2 */ + + case _STORE_ATTR_SLOT: { + stack_pointer += -2; + break; + } + + case _COMPARE_OP: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_FLOAT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_INT: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_STR: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _IS_OP: { + _Py_UOpsSymbolicExpression *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-2] = __b_; + stack_pointer += -1; + break; + } + + case _CONTAINS_OP: { + _Py_UOpsSymbolicExpression *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-2] = __b_; + stack_pointer += -1; + break; + } + + case _CHECK_EG_MATCH: { + _Py_UOpsSymbolicExpression *__rest_; + _Py_UOpsSymbolicExpression *__match_; + __rest_ = sym_init_unknown(ctx); + if(__rest_ == NULL) goto error; + __match_ = sym_init_unknown(ctx); + if(__match_ == NULL) goto error; + stack_pointer[-2] = __rest_; + stack_pointer[-1] = __match_; + break; + } + + case _CHECK_EXC_MATCH: { + _Py_UOpsSymbolicExpression *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-1] = __b_; + break; + } + + /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + case _IS_NONE: { + _Py_UOpsSymbolicExpression *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-1] = __b_; + break; + } + + case _GET_LEN: { + _Py_UOpsSymbolicExpression *__len_o_; + __len_o_ = sym_init_unknown(ctx); + if(__len_o_ == NULL) goto error; + stack_pointer[0] = __len_o_; + stack_pointer += 1; + break; + } + + case _MATCH_CLASS: { + _Py_UOpsSymbolicExpression *__attrs_; + __attrs_ = sym_init_unknown(ctx); + if(__attrs_ == NULL) goto error; + stack_pointer[-3] = __attrs_; + stack_pointer += -2; + break; + } + + case _MATCH_MAPPING: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _MATCH_SEQUENCE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _MATCH_KEYS: { + _Py_UOpsSymbolicExpression *__values_or_none_; + __values_or_none_ = sym_init_unknown(ctx); + if(__values_or_none_ == NULL) goto error; + stack_pointer[0] = __values_or_none_; + stack_pointer += 1; + break; + } + + case _GET_ITER: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + case _GET_YIELD_FROM_ITER: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + /* _FOR_ITER is not a viable micro-op for tier 2 */ + + case _FOR_ITER_TIER_TWO: { + _Py_UOpsSymbolicExpression *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ + + case _ITER_CHECK_LIST: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + if (Py_TYPE(iter) != &PyListIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_LIST: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + if (seq == NULL) goto error; + if (it->it_index >= PyList_GET_SIZE(seq)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _ITER_NEXT_LIST: { + _Py_UOpsSymbolicExpression *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_TUPLE: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + if (Py_TYPE(iter) != &PyTupleIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_TUPLE: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + if (seq == NULL) goto error; + if (it->it_index >= PyTuple_GET_SIZE(seq)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _ITER_NEXT_TUPLE: { + _Py_UOpsSymbolicExpression *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_RANGE: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + if (Py_TYPE(r) != &PyRangeIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_RANGE: { + _Py_UOpsSymbolicExpression *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + if (r->len <= 0) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _ITER_NEXT_RANGE: { + _Py_UOpsSymbolicExpression *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + + case _BEFORE_ASYNC_WITH: { + _Py_UOpsSymbolicExpression *__exit_; + _Py_UOpsSymbolicExpression *__res_; + __exit_ = sym_init_unknown(ctx); + if(__exit_ == NULL) goto error; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __exit_; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _BEFORE_WITH: { + _Py_UOpsSymbolicExpression *__exit_; + _Py_UOpsSymbolicExpression *__res_; + __exit_ = sym_init_unknown(ctx); + if(__exit_ == NULL) goto error; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __exit_; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _WITH_EXCEPT_START: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _PUSH_EXC_INFO: { + _Py_UOpsSymbolicExpression *__prev_exc_; + _Py_UOpsSymbolicExpression *__new_exc_; + __prev_exc_ = sym_init_unknown(ctx); + if(__prev_exc_ == NULL) goto error; + __new_exc_ = sym_init_unknown(ctx); + if(__new_exc_ == NULL) goto error; + stack_pointer[-1] = __prev_exc_; + stack_pointer[0] = __new_exc_; + stack_pointer += 1; + break; + } + + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + if (!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _GUARD_KEYS_VERSION: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *owner_cls = Py_TYPE(owner); + PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; + if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); + goto guard_required; + } + break; + } + + case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + _Py_UOpsSymbolicExpression *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + _Py_UOpsSymbolicExpression *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _CHECK_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymbolicExpression *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + Py_ssize_t dictoffset = Py_TYPE(owner)->tp_dictoffset; + assert(dictoffset > 0); + PyObject *dict = *(PyObject **)((char *)owner + dictoffset); + /* This object has a __dict__, just not yet created */ + if (dict != NULL) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + break; + } + + case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicExpression *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + /* _INSTRUMENTED_CALL is not a viable micro-op for tier 2 */ + + /* _CALL is not a viable micro-op for tier 2 */ + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymbolicExpression *__null_; + _Py_UOpsSymbolicExpression *__callable_; + __null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + // Constant evaluation + if (is_const(__callable_) && is_const(__null_)) { + PyObject *null; + PyObject *callable; + callable = get_const(__callable_); + null = get_const(__null_); + if (null != NULL) goto error; + if (Py_TYPE(callable) != &PyMethod_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0); + goto guard_required; + } + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymbolicExpression *__func_; + _Py_UOpsSymbolicExpression *__self_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-2 - oparg] = __func_; + stack_pointer[-1 - oparg] = __self_; + break; + } + + case _CHECK_PEP_523: { + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + _Py_UOpsSymbolicExpression *__self_or_null_; + _Py_UOpsSymbolicExpression *__callable_; + __self_or_null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__callable_) && is_const(__self_or_null_)) { + PyObject *self_or_null; + PyObject *callable; + callable = get_const(__callable_); + self_or_null = get_const(__self_or_null_); + if (!PyFunction_Check(callable)) goto error; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto error; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)){ + DPRINTF(2, "type propagation eliminated guard\n"); + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); + goto guard_required; + } + break; + } + + case _CHECK_STACK_SPACE: { + _Py_UOpsSymbolicExpression *__callable_; + __callable_ = stack_pointer[-2 - oparg]; + goto guard_required; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UOpsSymbolicExpression **__args_; + _Py_UOpsSymbolicExpression *__self_or_null_; + _Py_UOpsSymbolicExpression *__callable_; + _PyInterpreterFrame *__new_frame_; + __args_ = &stack_pointer[-oparg]; + __self_or_null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + __new_frame_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, oparg, __args_, 2 , __callable_, __self_or_null_); + stack_pointer[-2 - oparg] = (PyObject *)__new_frame_; + stack_pointer += -1 - oparg; + break; + } + + /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ + + case _CALL_TYPE_1: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_STR_1: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_TUPLE_1: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + /* _CALL_ALLOC_AND_ENTER_INIT is not a viable micro-op for tier 2 */ + + case _EXIT_INIT_CHECK: { + stack_pointer += -1; + break; + } + + case _CALL_BUILTIN_CLASS: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_O: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_LEN: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_ISINSTANCE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_O: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_NOARGS: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + /* _INSTRUMENTED_CALL_KW is not a viable micro-op for tier 2 */ + + /* _CALL_KW is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + case _MAKE_FUNCTION: { + _Py_UOpsSymbolicExpression *__func_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + stack_pointer[-1] = __func_; + break; + } + + case _SET_FUNCTION_ATTRIBUTE: { + _Py_UOpsSymbolicExpression *__func_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + stack_pointer[-2] = __func_; + stack_pointer += -1; + break; + } + + case _BUILD_SLICE: { + _Py_UOpsSymbolicExpression *__slice_; + __slice_ = sym_init_unknown(ctx); + if(__slice_ == NULL) goto error; + stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = __slice_; + stack_pointer += -1 - ((oparg == 3) ? 1 : 0); + break; + } + + case _CONVERT_VALUE: { + _Py_UOpsSymbolicExpression *__result_; + __result_ = sym_init_unknown(ctx); + if(__result_ == NULL) goto error; + stack_pointer[-1] = __result_; + break; + } + + case _FORMAT_SIMPLE: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _FORMAT_WITH_SPEC: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP: { + _Py_UOpsSymbolicExpression *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NONE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ + + case _GUARD_IS_TRUE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_FALSE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NONE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + stack_pointer += -1; + break; + } + + case _JUMP_TO_TOP: { + break; + } + + case _SET_IP: { + break; + } + + case _SAVE_RETURN_OFFSET: { + break; + } + + case _EXIT_TRACE: { + break; + } + + case _CHECK_VALIDITY: { + break; + } + + case _LOAD_CONST_INLINE_BORROW: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + break; + } + +#undef TIER_TWO diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index d2fac694a7fd05..d167d9ef77e74d 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -44,21 +44,22 @@ "COPY", "POP_TOP", "PUSH_NULL", - "END_SEND", "SWAP", # Frame stuff "_PUSH_FRAME", "_POP_FRAME", "_SHRINK_STACK", +} - - # Shouldn't appear in abstract interpreter - "_LOAD_FAST_NO_INCREF", - "_LOAD_CONST_IMMEDIATE", - "_SWAP_AND_POP", - "_STORE_COMMON", - "_LOAD_COMMON", +NO_CONST_OR_TYPE_EVALUATE = { + "_RESUME_CHECK", + "_GUARD_GLOBALS_VERSION", + "_GUARD_BUILTINS_VERSION", + "_CHECK_MANAGED_OBJECT_HAS_VALUES", + "_CHECK_PEP_523", + "_CHECK_STACK_SPACE", + "_INIT_CALL_PY_EXACT_ARGS", } def declare_variables( @@ -182,7 +183,7 @@ def _write_body_abstract_interp_pure_uop( assert len(uop.stack.outputs) == 1, f"Currently we only support 1 stack output for pure ops: {uop}" # uop is mandatory - we cannot const evaluate it - if uop.properties.mandatory: + if uop.name in NO_CONST_OR_TYPE_EVALUATE: out.emit(f"{mangled_uop.stack.outputs[0].name} = {new_sym(None, arr_var_size, arr_var_name, len(mangled_uop.stack.inputs), subexpressions)}") return @@ -218,7 +219,7 @@ def _write_body_abstract_interp_pure_uop( out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") # Perform type propagation - if (typ := uop.stack.outputs[0].typeprop) is not None: + if (typ := uop.stack.outputs[0].type_prop) is not None: typname, aux = typ aux = "0" if aux is None else aux out.emit("// Type propagation\n") @@ -231,7 +232,7 @@ def _write_body_abstract_interp_guard_uop( ) -> None: # 1. Attempt to perform guard elimination # 2. Type propagate for guard success - if uop.properties.mandatory: + if uop.name in NO_CONST_OR_TYPE_EVALUATE: out.emit("goto guard_required;") return @@ -263,7 +264,7 @@ def _write_body_abstract_interp_guard_uop( out.emit("}\n") # Does the input specify typed inputs? - if not any(output_var.typeprop for output_var in mangled_uop.stack.outputs): + if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): return # If the input types already match, eliminate the guard # Read the cache information to check the auxiliary type information @@ -276,7 +277,7 @@ def _write_body_abstract_interp_guard_uop( for output_var in mangled_uop.stack.outputs: if output_var.name in UNUSED: continue - if (typ := output_var.typeprop) is not None: + if (typ := output_var.type_prop) is not None: typname, aux = typ aux = "0" if aux is None else aux # Check that the input type information match (including auxiliary info) From 9d85c354483b606be8ddf6aea8c966bb55cc1307 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:32:34 +0800 Subject: [PATCH 003/111] cleanup --- Tools/cases_generator/tier2_abstract_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index d167d9ef77e74d..709cad7163261f 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -45,6 +45,7 @@ "POP_TOP", "PUSH_NULL", "SWAP", + "END_SEND", # Frame stuff "_PUSH_FRAME", @@ -246,8 +247,6 @@ def _write_body_abstract_interp_guard_uop( out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") out.emit("// Constant evaluation \n") - # TODO if we encode all type information of constants, then we shouldn't even need - # this part, and we can just do a type check. predicates_str = " && ".join([f"is_const({var.name})" for var in mangled_uop.stack.inputs if var.name not in UNUSED]) if predicates_str: out.emit(f"if ({predicates_str}) {{\n") @@ -311,6 +310,7 @@ def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) - for var in reversed(mangled_uop.stack.inputs): is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard old_var_name = var.name + # code smell, but basically impure ops don't use any of their inputs if is_impure: var.name = "unused" out.emit(stack.pop(var)) From 76cee0c160b04c77b7227d29cde11fe79d71ad23 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:35:37 +0800 Subject: [PATCH 004/111] run black --- .../tier2_abstract_generator.py | 128 +++++++++++------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 709cad7163261f..ee41d6297ab145 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -46,7 +46,6 @@ "PUSH_NULL", "SWAP", "END_SEND", - # Frame stuff "_PUSH_FRAME", "_POP_FRAME", @@ -63,12 +62,13 @@ "_INIT_CALL_PY_EXACT_ARGS", } + def declare_variables( - uop: Uop, - out: CWriter, - default_type: str = "_Py_UOpsSymbolicExpression *", - skip_inputs: bool = False, - skip_peeks: bool = False, + uop: Uop, + out: CWriter, + default_type: str = "_Py_UOpsSymbolicExpression *", + skip_inputs: bool = False, + skip_peeks: bool = False, ) -> None: variables = set(UNUSED) if not skip_inputs: @@ -77,7 +77,7 @@ def declare_variables( continue if var.name not in variables: type = var.type if var.type else default_type - if var.size > '1' and type == "PyObject **": + if var.size > "1" and type == "PyObject **": type = "_Py_UOpsSymbolicExpression **" variables.add(var.name) if var.condition: @@ -97,12 +97,12 @@ def declare_variables( def tier2_replace_deopt( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, ) -> None: out.emit_at("if ", tkn) out.emit(next(tkn_iter)) @@ -117,22 +117,23 @@ def tier2_replace_deopt( def _write_body_abstract_interp_impure_uop( - mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack ) -> None: - # Simply make all outputs effects unknown for var in mangled_uop.stack.outputs: if var.name in UNUSED or var.peek: continue - if var.size == '1': + if var.size == "1": out.emit(f"{var.name} = sym_init_unknown(ctx);\n") out.emit(f"if({var.name} == NULL) goto error;\n") if var.name in ("null", "__null_"): out.emit(f"sym_set_type({var.name}, NULL_TYPE, 0);\n") else: - out.emit(f"for (int case_gen_i = 0; case_gen_i < {var.size}; case_gen_i++) {{\n") + out.emit( + f"for (int case_gen_i = 0; case_gen_i < {var.size}; case_gen_i++) {{\n" + ) out.emit(f"{var.name}[case_gen_i] = sym_init_unknown(ctx);\n") out.emit(f"if({var.name}[case_gen_i] == NULL) goto error;\n") out.emit("}\n") @@ -141,16 +142,21 @@ def _write_body_abstract_interp_impure_uop( def mangle_uop_names(uop: Uop) -> Uop: uop = dataclasses.replace(uop) new_stack = dataclasses.replace(uop.stack) - new_stack.inputs = [dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.inputs] - new_stack.outputs = [dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.outputs] + new_stack.inputs = [ + dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.inputs + ] + new_stack.outputs = [ + dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.outputs + ] uop.stack = new_stack return uop + # Returns a tuple of a pointer to an array of subexpressions, the length of said array # and a string containing the join of all other subexpressions obtained from stack input. # This grabs variadic inputs that depend on things like oparg or cache def get_subexpressions(input_vars: list[StackItem]) -> tuple[str, int, str]: - arr_var = [(var.name, var) for var in input_vars if var.size > '1'] + arr_var = [(var.name, var) for var in input_vars if var.size > "1"] assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None arr_var_size = (arr_var[0][1].size or 0) if arr_var_name is not None else 0 @@ -161,12 +167,13 @@ def get_subexpressions(input_vars: list[StackItem]) -> tuple[str, int, str]: var = ", " + var return arr_var_name, arr_var_size, var + def new_sym( - constant: str | None, - arr_var_size: int | str | None, - arr_var_name: str | None, - subexpresion_count: int | str, - subexpressions: str + constant: str | None, + arr_var_size: int | str | None, + arr_var_name: str | None, + subexpresion_count: int | str, + subexpressions: str, ) -> str: return ( f"_Py_UOpsSymbolicExpression_New(" @@ -177,24 +184,35 @@ def new_sym( def _write_body_abstract_interp_pure_uop( - mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack ) -> None: + arr_var_name, arr_var_size, subexpressions = get_subexpressions( + mangled_uop.stack.inputs + ) - arr_var_name, arr_var_size, subexpressions = get_subexpressions(mangled_uop.stack.inputs) - - assert len(uop.stack.outputs) == 1, f"Currently we only support 1 stack output for pure ops: {uop}" + assert ( + len(uop.stack.outputs) == 1 + ), f"Currently we only support 1 stack output for pure ops: {uop}" # uop is mandatory - we cannot const evaluate it + sym = new_sym( + None, arr_var_size, arr_var_name, len(mangled_uop.stack.inputs), subexpressions + ) if uop.name in NO_CONST_OR_TYPE_EVALUATE: - out.emit(f"{mangled_uop.stack.outputs[0].name} = {new_sym(None, arr_var_size, arr_var_name, len(mangled_uop.stack.inputs), subexpressions)}") + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}") return - # Constant prop only handles one output, and no variadic inputs. # Perhaps in the future we can support these. - if all(input.size == '1' for input in uop.stack.inputs): + if all(input.size == "1" for input in uop.stack.inputs): # We can try a constant evaluation out.emit("// Constant evaluation\n") - predicates = " && ".join([f"is_const({var.name})" for var in mangled_uop.stack.inputs if var.name not in UNUSED]) + predicates = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) out.emit(f"if ({predicates or 0}){{\n") declare_variables(uop, out, default_type="PyObject *") @@ -202,19 +220,22 @@ def _write_body_abstract_interp_pure_uop( out.emit(f"{var.name} = get_const({mangled_var.name});\n") emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) out.emit("\n") - maybe_const_val = new_sym(f'(PyObject *){uop.stack.outputs[0].name}', None, None, - len(mangled_uop.stack.inputs), subexpressions) + maybe_const_val = new_sym( + f"(PyObject *){uop.stack.outputs[0].name}", + None, + None, + len(mangled_uop.stack.inputs), + subexpressions, + ) out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") out.emit("}\n") out.emit("else {\n") - sym = new_sym(None, None, None, - len(mangled_uop.stack.inputs), subexpressions) + sym = new_sym(None, None, None, len(mangled_uop.stack.inputs), subexpressions) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") out.emit("}\n") else: - sym = new_sym(None, None, None, - len(mangled_uop.stack.inputs), subexpressions) + sym = new_sym(None, None, None, len(mangled_uop.stack.inputs), subexpressions) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") @@ -228,8 +249,9 @@ def _write_body_abstract_interp_pure_uop( f"sym_set_type({mangled_uop.stack.outputs[0].name}, {typname}, (uint32_t){aux});" ) + def _write_body_abstract_interp_guard_uop( - mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack ) -> None: # 1. Attempt to perform guard elimination # 2. Type propagate for guard success @@ -247,7 +269,13 @@ def _write_body_abstract_interp_guard_uop( out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") out.emit("// Constant evaluation \n") - predicates_str = " && ".join([f"is_const({var.name})" for var in mangled_uop.stack.inputs if var.name not in UNUSED]) + predicates_str = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) if predicates_str: out.emit(f"if ({predicates_str}) {{\n") declare_variables(uop, out, default_type="PyObject *") @@ -270,9 +298,13 @@ def _write_body_abstract_interp_guard_uop( predicates = [] propagates = [] - assert len(mangled_uop.stack.outputs) == len(mangled_uop.stack.inputs), "guards must have same number of args" - assert [output == input_ for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs)], \ - "guards must forward their stack values" + assert len(mangled_uop.stack.outputs) == len( + mangled_uop.stack.inputs + ), "guards must have same number of args" + assert [ + output == input_ + for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) + ], "guards must forward their stack values" for output_var in mangled_uop.stack.outputs: if output_var.name in UNUSED: continue @@ -302,13 +334,13 @@ def _write_body_abstract_interp_guard_uop( out.emit("}\n") - - def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: try: out.start_line() for var in reversed(mangled_uop.stack.inputs): - is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard + is_impure = ( + not mangled_uop.properties.pure and not mangled_uop.properties.guard + ) old_var_name = var.name # code smell, but basically impure ops don't use any of their inputs if is_impure: @@ -333,7 +365,7 @@ def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) - def generate_tier2_abstract( - filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool ) -> None: write_header(__file__, filenames, outfile) outfile.write( From b71aa066ba9403bafd8aea792b37cadc88958f7e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 14 Jan 2024 10:55:22 +0800 Subject: [PATCH 005/111] the optimizer --- Include/internal/pycore_opcode_metadata.h | 2 + Include/internal/pycore_uop_ids.h | 5 +- Include/internal/pycore_uop_metadata.h | 28 +- Lib/test/test_capi/test_opt.py | 161 ++ Python/abstract_interp_cases.c.h | 40 + Python/bytecodes.c | 12 + Python/executor_cases.c.h | 20 + Python/optimizer.c | 9 +- Python/optimizer_analysis.c | 1582 ++++++++++++++++- Tools/cases_generator/analyzer.py | 9 +- Tools/cases_generator/generators_common.py | 2 + .../opcode_metadata_generator.py | 1 + .../tier2_abstract_generator.py | 4 +- 13 files changed, 1851 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index fbb448f663369a..1226f75afc8d4f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -911,6 +911,7 @@ enum InstructionFormat { #define HAS_ESCAPES_FLAG (512) #define HAS_PURE_FLAG (1024) #define HAS_PASSTHROUGH_FLAG (2048) +#define HAS_SPECIAL_OPT_FLAG (4096) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -923,6 +924,7 @@ enum InstructionFormat { #define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG)) #define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG)) #define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) +#define OPCODE_HAS_SPECIAL_OPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_SPECIAL_OPT_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8ee90d79a13c2f..8c32fd921b387c 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -232,7 +232,10 @@ extern "C" { #define _CHECK_VALIDITY 379 #define _LOAD_CONST_INLINE_BORROW 380 #define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define MAX_UOP_ID 381 +#define _LOAD_CONST_INLINE 382 +#define INIT_FAST 383 +#define _SHRINK_STACK 384 +#define MAX_UOP_ID 384 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3b251d3814b1da..09379f361fdf4f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -18,16 +18,16 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_NOP] = 0, [_RESUME_CHECK] = HAS_DEOPT_FLAG, - [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_SPECIAL_OPT_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_PURE_FLAG, - [_PUSH_NULL] = HAS_PURE_FLAG, + [_POP_TOP] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_PUSH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNARY_NOT] = HAS_PURE_FLAG, @@ -64,7 +64,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_POP_FRAME] = HAS_ESCAPES_FLAG, + [_POP_FRAME] = HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -169,7 +169,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = 0, + [_PUSH_FRAME] = HAS_SPECIAL_OPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -190,9 +190,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG, - [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, @@ -204,9 +204,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, + [_LOAD_CONST_INLINE] = 0, + [INIT_FAST] = 0, + [_SHRINK_STACK] = HAS_ARG_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { + [INIT_FAST] = "INIT_FAST", [_BEFORE_ASYNC_WITH] = "_BEFORE_ASYNC_WITH", [_BEFORE_WITH] = "_BEFORE_WITH", [_BINARY_OP] = "_BINARY_OP", @@ -329,6 +333,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_CONST] = "_LOAD_CONST", + [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", @@ -365,6 +370,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", [_SET_UPDATE] = "_SET_UPDATE", + [_SHRINK_STACK] = "_SHRINK_STACK", [_STORE_ATTR] = "_STORE_ATTR", [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE", [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 5c8c0596610303..685cca8d86e38c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -540,6 +540,167 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) +class TestUopsOptimization(unittest.TestCase): + + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 127) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region_attr(self): + class A: + foo = 1 + def testfunc(loops): + num = 0 + while num < loops: + x = A.foo + A.foo + y = 1 + A.foo + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + def test_int_large_pure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + num - num + num - num + num + num + num - num + num - num + y = 1 + a = x + num + num + num + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 11) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + + def test_frame_instance_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + a = A() + def testfunc(n): + for i in range(n): + a.foo() + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + + def test_frame_class_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + def testfunc(n): + a = A() + for i in range(n): + A.foo(a) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_CLASS", uops) if __name__ == "__main__": unittest.main() diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 093099164d6dc4..ebce3e85157824 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -18,6 +18,17 @@ /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + case _END_SEND: { + _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicExpression *__receiver_; + __value_ = stack_pointer[-1]; + __receiver_ = stack_pointer[-2]; + __value_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __receiver_, __value_); + stack_pointer[-2] = __value_; + stack_pointer += -1; + break; + } + case _UNARY_NEGATIVE: { _Py_UOpsSymbolicExpression *__res_; __res_ = sym_init_unknown(ctx); @@ -67,6 +78,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -968,6 +980,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1001,6 +1014,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1047,6 +1061,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1281,6 +1296,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1302,6 +1318,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1326,6 +1343,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1347,6 +1365,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1372,6 +1391,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1391,6 +1411,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1568,6 +1589,7 @@ DPRINTF(3, "const eliminated guard\n"); break; } + goto guard_required; break; } @@ -1952,4 +1974,22 @@ break; } + case _LOAD_CONST_INLINE: { + _Py_UOpsSymbolicExpression *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case INIT_FAST: { + break; + } + + case _SHRINK_STACK: { + stack_pointer += -oparg; + break; + } + #undef TIER_TWO diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6df99d6465347f..dd8e2b0269228e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4071,6 +4071,18 @@ dummy_func( exe->count++; } + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = Py_NewRef(ptr); + } + + // Represents a possibly uninitialized value in the abstract interpreter. + op(INIT_FAST, (--)) { + // Nothing, just a sentinel. + } + + op(_SHRINK_STACK, (unused[oparg] --)) { + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6060bebca9afa5..f2ba4fd981a040 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3405,4 +3405,24 @@ break; } + case _LOAD_CONST_INLINE: { + PyObject *value; + PyObject *ptr = (PyObject *)CURRENT_OPERAND(); + value = Py_NewRef(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case INIT_FAST: { + // Nothing, just a sentinel. + break; + } + + case _SHRINK_STACK: { + oparg = CURRENT_OPARG(); + stack_pointer += -oparg; + break; + } + #undef TIER_TWO diff --git a/Python/optimizer.c b/Python/optimizer.c index 236ae266971d48..f4dbb2dcd58ed6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -802,12 +802,9 @@ uop_optimize( return err; } OPT_STAT_INC(traces_created); - char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize == NULL || *uop_optimize > '0') { - err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); - if (err < 0) { - return -1; - } + err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); + if (err < 0) { + return -1; } _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7db51f0d90a453..95727242c41e98 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,5 +1,4 @@ #include "Python.h" -#include "opcode.h" #include "pycore_interp.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" @@ -7,11 +6,1546 @@ #include "pycore_uop_metadata.h" #include "pycore_long.h" #include "cpython/optimizer.h" +#include "pycore_optimizer.h" +#include "pycore_object.h" +#include "pycore_dict.h" +#include "pycore_function.h" +#include "pycore_uop_metadata.h" +#include "pycore_uop_ids.h" +#include "pycore_range.h" + +#include #include #include #include -#include "pycore_optimizer.h" + +#define MAX_ABSTRACT_INTERP_SIZE 2048 + +#define OVERALLOCATE_FACTOR 2 + +#ifdef Py_DEBUG + static const char *DEBUG_ENV = "PY_OPT_DEBUG"; + #define DPRINTF(level, ...) \ + if (lltrace >= (level)) { printf(__VA_ARGS__); } +#else + #define DPRINTF(level, ...) +#endif + +static inline bool +_PyOpcode_isterminal(uint32_t opcode) +{ + return (opcode == _LOAD_FAST || + opcode == _LOAD_FAST_CHECK || + opcode == _LOAD_FAST_AND_CLEAR || + opcode == INIT_FAST || + opcode == LOAD_CONST || + opcode == CACHE || + opcode == PUSH_NULL); +} + +static inline bool +_PyOpcode_isstackvalue(uint32_t opcode) +{ + return (opcode == CACHE); +} + + +typedef enum { + // Types with refinement info + GUARD_KEYS_VERSION_TYPE = 0, + GUARD_TYPE_VERSION_TYPE = 1, + // You might think this actually needs to encode oparg + // info as well, see _CHECK_FUNCTION_EXACT_ARGS. + // However, since oparg is tied to code object is tied to function version, + // it should be safe if function version matches. + PYFUNCTION_TYPE_VERSION_TYPE = 2, + + // Types without refinement info + PYLONG_TYPE = 3, + PYFLOAT_TYPE = 4, + PYUNICODE_TYPE = 5, + NULL_TYPE = 6, + PYMETHOD_TYPE = 7, + GUARD_DORV_VALUES_TYPE = 8, + GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE = 9, + + INVALID_TYPE = 31, +} _Py_UOpsSymExprTypeEnum; + +#define MAX_TYPE_WITH_REFINEMENT 2 +typedef struct { + // bitmask of types + uint32_t types; + // refinement data for the types + uint64_t refinement[MAX_TYPE_WITH_REFINEMENT + 1]; +} _Py_UOpsSymType; + + +typedef struct _Py_UOpsSymbolicExpression { + Py_ssize_t operand_count; + Py_ssize_t idx; + + // Type of the symbolic expression + _Py_UOpsSymType sym_type; + PyObject *const_val; + // The region where this expression was first created. + // This matters for anything that isn't immutable + int originating_region; + + // The following fields are for codegen. + _PyUOpInstruction inst; + + struct _Py_UOpsSymbolicExpression *operands[1]; +} _Py_UOpsSymbolicExpression; + + +static void +sym_dealloc(PyObject *o) +{ + _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)o; + // Note: we are not decerfing the symbolic expressions because we only hold + // a borrowed ref to them. The symexprs are kept alive by the global table. + Py_CLEAR(self->const_val); +} + +typedef enum _Py_UOps_IRStore_IdKind { + TARGET_NONE = -2, + TARGET_UNUSED = -1, + TARGET_LOCAL = 0, +} _Py_UOps_IRStore_IdKind; + +typedef enum _Py_UOps_IRStore_EntryKind { + IR_PLAIN_INST = 0, + IR_SYMBOLIC = 1, + IR_FRAME_PUSH_INFO = 2, + IR_FRAME_POP_INFO = 3, + IR_NOP = 4, +} _Py_UOps_IRStore_EntryKind; + +typedef struct _Py_UOpsOptIREntry { + _Py_UOps_IRStore_EntryKind typ; + union { + // IR_PLAIN_INST + _PyUOpInstruction inst; + // IR_SYMBOLIC + struct { + _Py_UOps_IRStore_IdKind assignment_target; + _Py_UOpsSymbolicExpression *expr; + }; + // IR_FRAME_PUSH_INFO, always precedes a _PUSH_FRAME IR_PLAIN_INST + struct { + // Only used in codegen for bookkeeping. + struct _Py_UOpsOptIREntry *prev_frame_ir; + // Localsplus of this frame. + _Py_UOpsSymbolicExpression **my_virtual_localsplus; + }; + // IR_FRAME_POP_INFO, always prior to a _POP_FRAME IR_PLAIN_INST + // no fields, just a sentinel + }; +} _Py_UOpsOptIREntry; + +typedef struct _Py_UOps_Opt_IR { + PyObject_VAR_HEAD + int curr_write; + _Py_UOpsOptIREntry entries[1]; +} _Py_UOps_Opt_IR; + +static PyTypeObject _Py_UOps_Opt_IR_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops SSA IR", + .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), + .tp_itemsize = sizeof(_Py_UOpsOptIREntry), + .tp_dealloc = PyObject_Del, + .tp_free = PyObject_Free, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +static void +ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore_IdKind store_fast_idx) +{ + // Don't store stuff we know will never get compiled. + if(_PyOpcode_isstackvalue(expr->inst.opcode) && store_fast_idx == TARGET_NONE) { + return; + } +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "ir_store: #%d, expr: %s oparg: %d, operand: %p\n", store_fast_idx, + (expr->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[expr->inst.opcode], + expr->inst.oparg, + (void *)expr->inst.operand); +#endif + _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; + entry->typ = IR_SYMBOLIC; + entry->assignment_target = store_fast_idx; + entry->expr = expr; + ir->curr_write++; +} + +static void +ir_plain_inst(_Py_UOps_Opt_IR *ir, _PyUOpInstruction inst) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "ir_inst: opcode: %s oparg: %d, operand: %p\n", + (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], + inst.oparg, + (void *)inst.operand); +#endif + _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; + entry->typ = IR_PLAIN_INST; + entry->inst = inst; + ir->curr_write++; +} + +static _Py_UOpsOptIREntry * +ir_frame_push_info(_Py_UOps_Opt_IR *ir) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "ir_frame_push_info\n"); +#endif + _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; + entry->typ = IR_FRAME_PUSH_INFO; + entry->my_virtual_localsplus = NULL; + entry->prev_frame_ir = NULL; + ir->curr_write++; + return entry; +} + + +static void +ir_frame_pop_info(_Py_UOps_Opt_IR *ir) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "ir_frame_pop_info\n"); +#endif + _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; + entry->typ = IR_FRAME_POP_INFO; + ir->curr_write++; +} + +typedef struct _Py_UOpsAbstractFrame { + PyObject_HEAD + // Strong reference. + struct _Py_UOpsAbstractFrame *prev; + // Borrowed reference. + struct _Py_UOpsAbstractFrame *next; + // Symbolic version of co_consts + int sym_consts_len; + _Py_UOpsSymbolicExpression **sym_consts; + // Max stacklen + int stack_len; + int locals_len; + + _Py_UOpsOptIREntry *frame_ir_entry; + + _Py_UOpsSymbolicExpression **stack_pointer; + _Py_UOpsSymbolicExpression **stack; + _Py_UOpsSymbolicExpression **locals; +} _Py_UOpsAbstractFrame; + +static void +abstractframe_dealloc(_Py_UOpsAbstractFrame *self) +{ + PyMem_Free(self->sym_consts); + Py_XDECREF(self->prev); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyTypeObject _Py_UOpsAbstractFrame_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract frame", + .tp_basicsize = sizeof(_Py_UOpsAbstractFrame) , + .tp_itemsize = 0, + .tp_dealloc = (destructor)abstractframe_dealloc, + .tp_free = PyObject_Free, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +typedef struct sym_arena { + // Current ID to assign a new (non-duplicate) sym_expr + Py_ssize_t sym_curr_id; + char *curr_available; + char *end; + char *arena; +} sym_arena; + +typedef struct frequent_syms { + _Py_UOpsSymbolicExpression *nulL_sym; +} frequent_syms; + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // Stores the symbolic for the upcoming new frame that is about to be created. + _Py_UOpsSymbolicExpression *new_frame_sym; + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + + int curr_region_id; + _Py_UOps_Opt_IR *ir; + + sym_arena s_arena; + + // The terminating instruction for the trace. Could be _JUMP_TO_TOP or + // _EXIT_TRACE. + _PyUOpInstruction *terminating; + + frequent_syms frequent_syms; + + _Py_UOpsSymbolicExpression **water_level; + _Py_UOpsSymbolicExpression **limit; + _Py_UOpsSymbolicExpression *localsplus[1]; +} _Py_UOpsAbstractInterpContext; + +static void +abstractinterp_dealloc(PyObject *o) +{ + _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; + Py_XDECREF(self->frame); + Py_DECREF(self->ir); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract interpreter's context", + .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymbolicExpression *), + .tp_itemsize = sizeof(_Py_UOpsSymbolicExpression *), + .tp_dealloc = (destructor)abstractinterp_dealloc, + .tp_free = PyObject_Free, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +static inline _Py_UOps_Opt_IR * +_Py_UOpsSSA_IR_New(int entries) +{ + _Py_UOps_Opt_IR *ir = PyObject_NewVar(_Py_UOps_Opt_IR, + &_Py_UOps_Opt_IR_Type, + entries); + ir->curr_write = 0; + return ir; +} + +static inline _Py_UOpsAbstractFrame * +frame_new(_Py_UOpsAbstractInterpContext *ctx, + PyObject *co_consts, int stack_len, int locals_len, + int curr_stacklen, _Py_UOpsOptIREntry *frame_ir_entry); +static inline int +frame_push(_Py_UOpsAbstractInterpContext *ctx, + _Py_UOpsAbstractFrame *frame, + _Py_UOpsSymbolicExpression **localsplus_start, + int locals_len, + int curr_stacklen, + int total_len); + +static inline int +frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, + int locals_len, int curr_stacklen); + +static _Py_UOpsAbstractInterpContext * +abstractinterp_context_new(PyCodeObject *co, + int curr_stacklen, + int ir_entries) +{ + int locals_len = co->co_nlocalsplus; + int stack_len = co->co_stacksize; + _Py_UOpsAbstractFrame *frame = NULL; + _Py_UOpsAbstractInterpContext *self = NULL; + _Py_UOps_Opt_IR *ir = NULL; + char *arena = NULL; + Py_ssize_t arena_size = sizeof(_Py_UOpsSymbolicExpression) * ir_entries * OVERALLOCATE_FACTOR; + arena = (char *)PyMem_Malloc(arena_size); + if (arena == NULL) { + goto error; + } + + ir = _Py_UOpsSSA_IR_New(ir_entries); + if (ir == NULL) { + goto error; + } + _Py_UOpsOptIREntry *root_frame = ir_frame_push_info(ir); + + self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, + &_Py_UOpsAbstractInterpContext_Type, + MAX_ABSTRACT_INTERP_SIZE); + if (self == NULL) { + goto error; + } + + self->limit = self->localsplus + MAX_ABSTRACT_INTERP_SIZE; + self->water_level = self->localsplus; + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + self->localsplus[i] = NULL; + } + + self->curr_region_id = 0; + // Setup the arena for sym expressions. + self->s_arena.sym_curr_id = 0; + self->s_arena.arena = arena; + self->s_arena.curr_available = arena; + self->s_arena.end = arena + arena_size; + + // Frame setup + self->new_frame_sym = NULL; + frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen, root_frame); + if (frame == NULL) { + goto error; + } + if (frame_push(self, frame, self->water_level, locals_len, curr_stacklen, + stack_len + locals_len) < 0) { + goto error; + } + if (frame_initalize(self, frame, locals_len, curr_stacklen) < 0) { + goto error; + } + self->frame = frame; + root_frame->my_virtual_localsplus = self->localsplus; + + // IR and sym setup + self->ir = ir; + self->frequent_syms.nulL_sym = NULL; + + + return self; + +error: + PyMem_Free(arena); + if (self != NULL) { + self->s_arena.arena = NULL; + } + Py_XDECREF(self); + Py_XDECREF(ir); + Py_XDECREF(frame); + return NULL; +} + +static inline _Py_UOpsSymbolicExpression* +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx); + +static inline _Py_UOpsSymbolicExpression ** +create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) +{ + Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); + _Py_UOpsSymbolicExpression **sym_consts = PyMem_New(_Py_UOpsSymbolicExpression *, co_const_len); + if (sym_consts == NULL) { + return NULL; + } + for (Py_ssize_t i = 0; i < co_const_len; i++) { + _Py_UOpsSymbolicExpression *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); + if (res == NULL) { + goto error; + } + sym_consts[i] = res; + } + + return sym_consts; +error: + Py_DECREF(sym_consts); + return NULL; +} + +static inline _Py_UOpsSymbolicExpression* +sym_init_var(_Py_UOpsAbstractInterpContext *ctx, int locals_idx); + +static inline _Py_UOpsSymbolicExpression* +sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); + +static void +sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym); + +/* + * The reason why we have a separate frame_push and frame_initialize is to mimic + * what CPython's frame push does. This also prepares for inlining. + * */ +static inline int +frame_push(_Py_UOpsAbstractInterpContext *ctx, + _Py_UOpsAbstractFrame *frame, + _Py_UOpsSymbolicExpression **localsplus_start, + int locals_len, + int curr_stacklen, + int total_len) +{ + frame->locals = localsplus_start; + frame->stack = frame->locals + locals_len; + frame->stack_pointer = frame->stack + curr_stacklen; + ctx->water_level = localsplus_start + total_len; + if (ctx->water_level > ctx->limit) { + return -1; + } + return 0; +} + +static inline int +frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, + int locals_len, int curr_stacklen) +{ + // Initialize with the initial state of all local variables + for (int i = 0; i < locals_len; i++) { + _Py_UOpsSymbolicExpression *local = sym_init_var(ctx, i); + if (local == NULL) { + goto error; + } + frame->locals[i] = local; + } + + + // Initialize the stack as well + for (int i = 0; i < curr_stacklen; i++) { + _Py_UOpsSymbolicExpression *stackvar = sym_init_unknown(ctx); + if (stackvar == NULL) { + goto error; + } + frame->stack[i] = stackvar; + } + + return 0; + +error: + return -1; +} + +static inline _Py_UOpsAbstractFrame * +frame_new(_Py_UOpsAbstractInterpContext *ctx, + PyObject *co_consts, int stack_len, int locals_len, + int curr_stacklen, _Py_UOpsOptIREntry *frame_ir_entry) +{ + _Py_UOpsSymbolicExpression **sym_consts = create_sym_consts(ctx, co_consts); + if (sym_consts == NULL) { + return NULL; + } + int total_len = stack_len + locals_len; + _Py_UOpsAbstractFrame *frame = PyObject_New(_Py_UOpsAbstractFrame, + &_Py_UOpsAbstractFrame_Type); + if (frame == NULL) { + Py_DECREF(sym_consts); + return NULL; + } + + + frame->sym_consts = sym_consts; + frame->sym_consts_len = (int)Py_SIZE(co_consts); + frame->stack_len = stack_len; + frame->locals_len = locals_len; + frame->prev = NULL; + frame->next = NULL; + + frame->frame_ir_entry = frame_ir_entry; + return frame; +} + +static inline bool +sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ); +static inline uint64_t +sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ); + +static inline PyFunctionObject * +extract_func_from_sym(_Py_UOpsSymbolicExpression *frame_sym) +{ + switch(frame_sym->inst.opcode) { + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UOpsSymbolicExpression *callable_sym = frame_sym->operands[0]; + if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { + return NULL; + } + uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); + PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); + if (func == NULL) { + return NULL; + } + return func; + } + default: + Py_UNREACHABLE(); + } +} + +static inline _Py_UOpsSymbolicExpression* +extract_self_or_null_from_sym(_Py_UOpsSymbolicExpression *frame_sym) +{ + switch(frame_sym->inst.opcode) { + case _INIT_CALL_PY_EXACT_ARGS: + return frame_sym->operands[1]; + default: + Py_UNREACHABLE(); + } +} + +static inline _Py_UOpsSymbolicExpression** +extract_args_from_sym(_Py_UOpsSymbolicExpression *frame_sym) +{ + switch(frame_sym->inst.opcode) { + case _INIT_CALL_PY_EXACT_ARGS: + return &frame_sym->operands[2]; + default: + Py_UNREACHABLE(); + } +} + +// 0 on success, anything else is error. +static int +ctx_frame_push( + _Py_UOpsAbstractInterpContext *ctx, + _Py_UOpsOptIREntry *frame_ir_entry, + PyCodeObject *co, + _Py_UOpsSymbolicExpression **localsplus_start +) +{ + assert(frame_ir_entry != NULL); + _Py_UOpsAbstractFrame *frame = frame_new(ctx, + co->co_consts, co->co_stacksize, + co->co_nlocalsplus, + 0, frame_ir_entry); + if (frame == NULL) { + return -1; + } + if (frame_push(ctx, frame, localsplus_start, co->co_nlocalsplus, 0, + co->co_nlocalsplus + co->co_stacksize) < 0) { + return -1; + } + if (frame_initalize(ctx, frame, co->co_nlocalsplus, 0) < 0) { + return -1; + } + + frame->prev = ctx->frame; + ctx->frame->next = frame; + ctx->frame = frame; + + frame_ir_entry->my_virtual_localsplus = localsplus_start; + + return 0; +} + +static int +ctx_frame_pop( + _Py_UOpsAbstractInterpContext *ctx +) +{ + _Py_UOpsAbstractFrame *frame = ctx->frame; + ctx->frame = frame->prev; + assert(ctx->frame != NULL); + frame->prev = NULL; + + ctx->water_level = frame->locals; + Py_DECREF(frame); + ctx->frame->next = NULL; + return 0; +} + +// Steals a reference to const_val +// Creates a symbolic expression consisting of subexpressoins +// from arr_start and va_list. +// The order is +// , +static _Py_UOpsSymbolicExpression* +_Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, + _PyUOpInstruction inst, + PyObject *const_val, + int num_arr, + _Py_UOpsSymbolicExpression **arr_start, + int num_subexprs, ...) +{ + int total_subexprs = num_arr + num_subexprs; + + _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)ctx->s_arena.curr_available; + ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicExpression) + sizeof(_Py_UOpsSymbolicExpression *) * total_subexprs; + if (ctx->s_arena.curr_available >= ctx->s_arena.end) { + return NULL; + } + + self->idx = -1; + self->sym_type.types = 1 << INVALID_TYPE; + self->inst = inst; + self->const_val = const_val; + self->originating_region = ctx->curr_region_id; + + + // Setup + int i = 0; + _Py_UOpsSymbolicExpression **operands = self->operands; + va_list curr; + + va_start(curr, num_subexprs); + + for (; i < num_subexprs; i++) { + // Note: no incref here. symexprs are kept alive by the global expression + // table. + // We intentionally don't want to hold a reference to it so we don't + // need GC. + operands[i] = va_arg(curr, _Py_UOpsSymbolicExpression *); + assert(operands[i]); + } + + va_end(curr); + + for (int x = 0; x < num_arr; x++) { + operands[i+x] = arr_start[x]; + assert(operands[i+x]); + } + + self->operand_count = total_subexprs; + + return self; +} + + +static void +sym_set_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +{ + sym->sym_type.types |= 1 << typ; + if (typ <= MAX_TYPE_WITH_REFINEMENT) { + sym->sym_type.refinement[typ] = refinement; + } +} +static void +sym_copy_type(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) +{ + to_sym->sym_type = from_sym->sym_type; + Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); +} + +static void +sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) +{ + uint32_t immutables = (1 << NULL_TYPE | 1 << PYLONG_TYPE | 1 << PYFLOAT_TYPE | 1 << PYUNICODE_TYPE); + to_sym->sym_type.types = (from_sym->sym_type.types & immutables); + Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); +} + +static void +sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj) +{ + PyTypeObject *tp = Py_TYPE(obj); + + if (tp == &PyLong_Type) { + sym_set_type(sym, PYLONG_TYPE, 0); + } + else if (tp == &PyFloat_Type) { + sym_set_type(sym, PYFLOAT_TYPE, 0); + } + else if (tp == &PyUnicode_Type) { + sym_set_type(sym, PYUNICODE_TYPE, 0); + } + + if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(obj); + + if (_PyDictOrValues_IsValues(*dorv) || + _PyObject_MakeInstanceAttributesFromDict(obj, dorv)) { + sym_set_type(sym, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, 0); + + PyTypeObject *owner_cls = tp; + PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; + sym_set_type( + sym, + GUARD_KEYS_VERSION_TYPE, + owner_heap_type->ht_cached_keys->dk_version + ); + } + + if (!_PyDictOrValues_IsValues(*dorv)) { + sym_set_type(sym, GUARD_DORV_VALUES_TYPE, 0); + } + } + + sym_set_type(sym, GUARD_TYPE_VERSION_TYPE, tp->tp_version_tag); +} + + +static inline _Py_UOpsSymbolicExpression* +sym_init_var(_Py_UOpsAbstractInterpContext *ctx, int locals_idx) +{ + _PyUOpInstruction inst = {INIT_FAST, locals_idx, 0, 0}; + return _Py_UOpsSymbolicExpression_New(ctx, + inst, + NULL, + 0, + NULL, + 0); +} + +static inline _Py_UOpsSymbolicExpression* +sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) +{ + _PyUOpInstruction inst = {CACHE, 0, 0, 0}; + return _Py_UOpsSymbolicExpression_New(ctx, + inst, + NULL, + 0, + NULL, + 0); +} + +// Steals a reference to const_val +static inline _Py_UOpsSymbolicExpression* +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) +{ + _PyUOpInstruction inst = {LOAD_CONST, const_idx, 0, 0}; + _Py_UOpsSymbolicExpression *temp = _Py_UOpsSymbolicExpression_New( + ctx, + inst, + const_val, + 0, + NULL, + 0 + ); + if (temp == NULL) { + return NULL; + } + sym_set_type_from_const(temp, const_val); + return temp; +} + +static _Py_UOpsSymbolicExpression* +sym_init_null(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx->frequent_syms.nulL_sym != NULL) { + return ctx->frequent_syms.nulL_sym; + } + _Py_UOpsSymbolicExpression *null_sym = sym_init_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + null_sym->inst.opcode = PUSH_NULL; + sym_set_type(null_sym, NULL_TYPE, 0); + ctx->frequent_syms.nulL_sym = null_sym; + + return null_sym; +} + +static inline bool +sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) +{ + if ((sym->sym_type.types & (1 << typ)) == 0) { + return false; + } + return true; +} + +static inline bool +sym_matches_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +{ + if (!sym_is_type(sym, typ)) { + return false; + } + if (typ <= MAX_TYPE_WITH_REFINEMENT) { + return sym->sym_type.refinement[typ] == refinement; + } + return true; +} + +static uint64_t +sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) +{ + assert(sym_is_type(sym, typ)); + assert(typ <= MAX_TYPE_WITH_REFINEMENT); + return sym->sym_type.refinement[typ]; +} + + +static inline bool +op_is_end(uint32_t opcode) +{ + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; +} + +static inline bool +op_is_passthrough(uint32_t opcode) +{ +return _PyUop_Flags[opcode] & HAS_PASSTHROUGH_FLAG; +} + +static inline bool +op_is_pure(uint32_t opcode) +{ + return _PyUop_Flags[opcode] & HAS_PURE_FLAG; +} + +static inline bool +op_is_bookkeeping(uint32_t opcode) { + return (opcode == _SET_IP || + opcode == _CHECK_VALIDITY); +} + +static inline bool +op_is_specially_handled(uint32_t opcode) +{ + return _PyUop_Flags[opcode] & HAS_SPECIAL_OPT_FLAG; +} + +static inline bool +is_const(_Py_UOpsSymbolicExpression *expr) +{ + return expr->const_val != NULL; +} + +static inline PyObject * +get_const(_Py_UOpsSymbolicExpression *expr) +{ + return Py_NewRef(expr->const_val); +} + + + +static int +write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, bool copy_types) { +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "write_stack_to_ir\n"); +#endif + // Emit the state of the stack first. + int stack_entries = ctx->frame->stack_pointer - ctx->frame->stack; + for (int i = 0; i < stack_entries; i++) { + ir_store(ctx->ir, ctx->frame->stack[i], TARGET_NONE); + _Py_UOpsSymbolicExpression *new_stack = sym_init_unknown(ctx); + if (new_stack == NULL) { + goto error; + } + if (copy_types) { + sym_copy_type(ctx->frame->stack[i], new_stack); + } else { + sym_copy_immutable_type_info(ctx->frame->stack[i], new_stack); + } + ctx->frame->stack[i] = new_stack; + } + // Write bookkeeping ops, but don't write duplicates. + if((curr-1)->opcode == _CHECK_VALIDITY && (curr-2)->opcode == _SET_IP) { + ir_plain_inst(ctx->ir, *(curr-2)); + ir_plain_inst(ctx->ir, *(curr-1)); + } + return 0; + +error: + return -1; +} + +static void +clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { + int locals_entries = ctx->frame->locals_len; + for (int i = 0; i < locals_entries; i++) { + // clears out all types except immutables. + sym_copy_immutable_type_info(ctx->frame->locals[i], ctx->frame->locals[i]); + } +} + +typedef enum { + ABSTRACT_INTERP_ERROR, + ABSTRACT_INTERP_NORMAL, + ABSTRACT_INTERP_GUARD_REQUIRED, +} AbstractInterpExitCodes; + + +#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ +do { \ + if (Py_REFCNT(left) == 1) { \ + ((PyFloatObject *)left)->ob_fval = (dval); \ + _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\ + result = (left); \ + } \ + else if (Py_REFCNT(right) == 1) {\ + ((PyFloatObject *)right)->ob_fval = (dval); \ + _Py_DECREF_NO_DEALLOC(left); \ + result = (right); \ + }\ + else { \ + result = PyFloat_FromDouble(dval); \ + if ((result) == NULL) goto error; \ + _Py_DECREF_NO_DEALLOC(left); \ + _Py_DECREF_NO_DEALLOC(right); \ + } \ +} while (0) + +#define DEOPT_IF(COND, INSTNAME) \ + if ((COND)) { \ + goto guard_required; \ + } + +#ifndef Py_DEBUG +#define GETITEM(ctx, i) (ctx->frame->sym_consts[(i)]) +#else +static inline _Py_UOpsSymbolicExpression * +GETITEM(_Py_UOpsAbstractInterpContext *ctx, Py_ssize_t i) { + assert(i < ctx->frame->sym_consts_len); + return ctx->frame->sym_consts[i]; +} +#endif + +static int +uop_abstract_interpret_single_inst( + PyCodeObject *co, + _PyUOpInstruction *inst, + _PyUOpInstruction *end, + _Py_UOpsAbstractInterpContext *ctx +) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + +#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) +#define STACK_SIZE() (co->co_stacksize) +#define BASIC_STACKADJ(n) (stack_pointer += n) + +#ifdef Py_DEBUG + #define STACK_GROW(n) do { \ + assert(n >= 0); \ + BASIC_STACKADJ(n); \ + if (STACK_LEVEL() > STACK_SIZE()) { \ + DPRINTF(2, "err: %d, %d\n", STACK_SIZE(), STACK_LEVEL())\ + } \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) + #define STACK_SHRINK(n) do { \ + assert(n >= 0); \ + assert(STACK_LEVEL() >= n); \ + BASIC_STACKADJ(-(n)); \ + } while (0) +#else + #define STACK_GROW(n) BASIC_STACKADJ(n) + #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) +#endif +#define PEEK(idx) (((stack_pointer)[-(idx)])) +#define GETLOCAL(idx) ((ctx->frame->locals[idx])) + +#define CURRENT_OPARG() (oparg) + +#define CURRENT_OPERAND() (operand) + +#define STAT_INC(opname, name) ((void)0) +#define TIER_TWO_ONLY ((void)0) + + int oparg = inst->oparg; + uint32_t opcode = inst->opcode; + uint64_t operand = inst->operand; + + _Py_UOpsSymbolicExpression **stack_pointer = ctx->frame->stack_pointer; + + + DPRINTF(2, "Abstract interpreting %s:%d ", + (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + oparg); + switch (opcode) { +#include "abstract_interp_cases.c.h" + // Note: LOAD_FAST_CHECK is not pure!!! + case LOAD_FAST_CHECK: + STACK_GROW(1); + PEEK(1) = GETLOCAL(oparg); + // Value might be uninitialized, and might error. + if(PEEK(1)->inst.opcode == INIT_FAST) { + // In that case, to be safe, treat it as an impure region + ctx->curr_region_id++; + ctx->frame->stack_pointer = stack_pointer; + write_stack_to_ir(ctx, inst, true); + } + break; + case LOAD_FAST: + STACK_GROW(1); + // Guaranteed by the CPython bytecode compiler to not be uninitialized. + PEEK(1) = GETLOCAL(oparg); + assert(PEEK(1)); + + break; + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + PEEK(1) = GETLOCAL(oparg); + _Py_UOpsSymbolicExpression *null_sym = sym_init_null(ctx); + if (null_sym == NULL) { + goto error; + } + GETLOCAL(oparg) = null_sym; + break; + } + case LOAD_CONST: { + STACK_GROW(1); + PEEK(1) = (_Py_UOpsSymbolicExpression *)GETITEM( + ctx, oparg); + break; + } + case STORE_FAST_MAYBE_NULL: + case STORE_FAST: { + _Py_UOpsSymbolicExpression *value = PEEK(1); + ir_store(ctx->ir, value, oparg); + _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); + if (new_local == NULL) { + goto error; + } + sym_copy_type(value, new_local); + GETLOCAL(oparg) = new_local; + STACK_SHRINK(1); + break; + } + case COPY: { + _Py_UOpsSymbolicExpression *bottom = PEEK(1 + (oparg - 1)); + STACK_GROW(1); + _Py_UOpsSymbolicExpression *temp = sym_init_unknown(ctx); + if (temp == NULL) { + goto error; + } + PEEK(1) = temp; + sym_copy_type(bottom, temp); + break; + } + + case POP_TOP: { + ir_store(ctx->ir, PEEK(1), -1); + STACK_SHRINK(1); + break; + } + + case PUSH_NULL: { + STACK_GROW(1); + _Py_UOpsSymbolicExpression *null_sym = sym_init_null(ctx); + if (null_sym == NULL) { + goto error; + } + PEEK(1) = null_sym; + break; + } + + case _PUSH_FRAME: { + int argcount = oparg; + _Py_UOpsAbstractFrame *old_frame = ctx->frame; + // TOS is the new frame. + write_stack_to_ir(ctx, inst, true); + STACK_SHRINK(1); + ctx->frame->stack_pointer = stack_pointer; + _Py_UOpsOptIREntry *frame_ir_entry = ir_frame_push_info(ctx->ir); + + PyFunctionObject *func = extract_func_from_sym(ctx->new_frame_sym); + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + + _Py_UOpsSymbolicExpression *self_or_null = extract_self_or_null_from_sym(ctx->new_frame_sym); + assert(self_or_null != NULL); + assert(ctx->new_frame_sym != NULL); + _Py_UOpsSymbolicExpression **args = extract_args_from_sym(ctx->new_frame_sym); + assert(args != NULL); + ctx->new_frame_sym = NULL; + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS + if (!sym_is_type(self_or_null, NULL_TYPE)) { + args--; + argcount++; + } + ir_plain_inst(ctx->ir, *inst); + if (ctx_frame_push( + ctx, + frame_ir_entry, + co, + ctx->water_level + ) != 0){ + goto error; + } + stack_pointer = ctx->frame->stack_pointer; + for (int i = 0; i < argcount; i++) { + sym_copy_type(args[i], ctx->frame->locals[i]); + } + break; + } + + case _POP_FRAME: { + assert(STACK_LEVEL() == 1); + write_stack_to_ir(ctx, inst, true); + _Py_UOpsOptIREntry *frame_ir_entry = ctx->frame->frame_ir_entry; + ir_frame_pop_info(ctx->ir); + ir_plain_inst(ctx->ir, *inst); + _Py_UOpsSymbolicExpression *retval = PEEK(1); + STACK_SHRINK(1); + ctx->frame->stack_pointer = stack_pointer; + + if (ctx_frame_pop(ctx) != 0){ + goto error; + } + stack_pointer = ctx->frame->stack_pointer; + // Push retval into new frame. + STACK_GROW(1); + _Py_UOpsSymbolicExpression *new_retval = sym_init_unknown(ctx); + if (new_retval == NULL) { + goto error; + } + PEEK(1) = new_retval; + sym_copy_type(retval, new_retval); + break; + } + + case SWAP: { + write_stack_to_ir(ctx, inst, true); + ir_plain_inst(ctx->ir, *inst); + + _Py_UOpsSymbolicExpression *top; + _Py_UOpsSymbolicExpression *bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2 - (oparg-2)]; + assert(oparg >= 2); + + _Py_UOpsSymbolicExpression *new_top = sym_init_unknown(ctx); + if (new_top == NULL) { + goto error; + } + sym_copy_type(top, new_top); + + _Py_UOpsSymbolicExpression *new_bottom = sym_init_unknown(ctx); + if (new_bottom == NULL) { + goto error; + } + sym_copy_type(bottom, new_bottom); + + stack_pointer[-2 - (oparg-2)] = new_top; + stack_pointer[-1] = new_bottom; + break; + } + default: + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); + } + + // Store the frame symbolic to extract information later + if (opcode == _INIT_CALL_PY_EXACT_ARGS) { + ctx->new_frame_sym = PEEK(1); + DPRINTF(3, "call_py_exact_args: {"); + for (Py_ssize_t i = 0; i < (ctx->new_frame_sym->operand_count); i++) { + DPRINTF(3, "#%ld (%s)", i, ((ctx->new_frame_sym->operands[i]->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[ctx->new_frame_sym->operands[i]->inst.opcode])) + } + DPRINTF(3, "} \n"); + } + DPRINTF(2, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + + return ABSTRACT_INTERP_NORMAL; + +pop_2_error_tier_two: + STACK_SHRINK(1); + STACK_SHRINK(1); +error: + DPRINTF(1, "Encountered error in abstract interpreter\n"); + return ABSTRACT_INTERP_ERROR; + +guard_required: + DPRINTF(2, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + + return ABSTRACT_INTERP_GUARD_REQUIRED; + +} + +static _Py_UOpsAbstractInterpContext * +uop_abstract_interpret( + PyCodeObject *co, + _PyUOpInstruction *trace, + int trace_len, + int curr_stacklen +) +{ + +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + // Initialize the symbolic consts + + _Py_UOpsAbstractInterpContext *ctx = NULL; + + ctx = abstractinterp_context_new( + co, curr_stacklen, + trace_len); + if (ctx == NULL) { + goto error; + } + + _PyUOpInstruction *curr = trace; + _PyUOpInstruction *end = trace + trace_len; + AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; + + bool first_impure = true; + while (curr < end && !op_is_end(curr->opcode)) { + + if (!op_is_pure(curr->opcode) && + !op_is_specially_handled(curr->opcode) && + !op_is_bookkeeping(curr->opcode) && + !op_is_passthrough(curr->opcode)) { + DPRINTF(2, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); + if (first_impure) { + write_stack_to_ir(ctx, curr, false); + clear_locals_type_info(ctx); + } + first_impure = false; + ctx->curr_region_id++; + ir_plain_inst(ctx->ir, *curr); + } + else { + first_impure = true; + } + + status = uop_abstract_interpret_single_inst( + co, curr, end, ctx + ); + if (status == ABSTRACT_INTERP_ERROR) { + goto error; + } + else if (status == ABSTRACT_INTERP_GUARD_REQUIRED) { + DPRINTF(2, "GUARD\n"); + // Emit the state of the stack first. + // Since this is a guard, copy over the type info + write_stack_to_ir(ctx, curr, true); + ir_plain_inst(ctx->ir, *curr); + } + + curr++; + + } + + ctx->terminating = curr; + write_stack_to_ir(ctx, curr, false); + + return ctx; + +error: + return NULL; +} + +typedef struct _Py_UOpsEmitter { + _PyUOpInstruction *writebuffer; + _PyUOpInstruction *writebuffer_end; + _PyUOpInstruction *writebuffer_true_end; + int curr_i; + int curr_reserve_i; + + int consumed_localsplus_slots; + _Py_UOpsOptIREntry *curr_frame_ir_entry; +} _Py_UOpsEmitter; + +static inline int +emit_i(_Py_UOpsEmitter *emitter, + _PyUOpInstruction inst) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + if (emitter->curr_i < 0) { + DPRINTF(2, "out of emission space\n"); + return -1; + } + if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { + DPRINTF(2, "out of emission space\n"); + return -1; + } + DPRINTF(3, "Emitting instruction at [%d] op: %s, oparg: %d, operand: %" PRIu64 " \n", + emitter->curr_i, + (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], + inst.oparg, + inst.operand); + emitter->writebuffer[emitter->curr_i] = inst; + emitter->curr_i++; + return 0; +} + + + +static int +count_stack_operands(_Py_UOpsSymbolicExpression *sym) +{ + int total = 0; + for (Py_ssize_t i = 0; i < sym->operand_count; i++) { + if (_PyOpcode_isstackvalue(sym->operands[i]->inst.opcode)) { + total++; + } + } + return total; +} + +static int +compile_sym_to_uops(_Py_UOpsEmitter *emitter, + _Py_UOpsSymbolicExpression *sym, + _Py_UOpsAbstractInterpContext *ctx, + bool do_cse) +{ + _PyUOpInstruction inst; + // Since CPython is a stack machine, just compile in the order + // seen in the operands, then the instruction itself. + + // Constant propagated value, load immediate constant + if (sym->const_val != NULL && !_PyOpcode_isstackvalue(sym->inst.opcode)) { + // Shrink the stack if operands consist of stack values. + // We don't need them anymore. This could happen because + // the operands first need to be guarded and the guard could not + // be eliminated via constant propagation. + int stack_operands = count_stack_operands(sym); + if (stack_operands) { + inst.opcode = _SHRINK_STACK; + inst.oparg = (int)sym->operand_count; + inst.operand = 0; + if (emit_i(emitter, inst) < 0) { + return -1; + } + } + + inst.opcode = _LOAD_CONST_INLINE; + inst.oparg = 0; + // TODO memory leak. + inst.operand = (uint64_t)Py_NewRef(sym->const_val); + return emit_i(emitter, inst); + } + + if (_PyOpcode_isterminal(sym->inst.opcode)) { + // These are for unknown stack entries. + if (_PyOpcode_isstackvalue(sym->inst.opcode)) { + // Leave it be. These are initial values from the start + return 0; + } + inst = sym->inst; + if (sym->inst.opcode == INIT_FAST) { + inst.opcode = LOAD_FAST; + } + return emit_i(emitter, inst); + } + + // Compile each operand + Py_ssize_t operands_count = sym->operand_count; + for (Py_ssize_t i = 0; i < operands_count; i++) { + if (sym->operands[i] == NULL) { + continue; + } + // TODO Py_EnterRecursiveCall ? + if (compile_sym_to_uops( + emitter, + sym->operands[i], + ctx, true) < 0) { + return -1; + } + } + + + // Finally, emit the operation itself. + return emit_i(emitter, sym->inst); +} + +static int +emit_uops_from_ctx( + _Py_UOpsAbstractInterpContext *ctx, + _PyUOpInstruction *trace_writebuffer, + _PyUOpInstruction *writebuffer_end, + int *nop_to +) +{ + +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + + + _Py_UOpsAbstractFrame *root_frame = ctx->frame; + while (root_frame->prev != NULL) { + root_frame = root_frame->prev; + } + _Py_UOpsEmitter emitter = { + trace_writebuffer, + writebuffer_end, + writebuffer_end, + 0, + (int)(writebuffer_end - trace_writebuffer), + 0, + root_frame->frame_ir_entry + }; + + _Py_UOps_Opt_IR *ir = ctx->ir; + int entries = ir->curr_write; + // First entry reserved for the root frame info. + for (int i = 1; i < entries; i++) { + _Py_UOpsOptIREntry *curr = &ir->entries[i]; + switch (curr->typ) { + case IR_SYMBOLIC: { + DPRINTF(3, "symbolic: expr: %s oparg: %d, operand: %p\n", + (curr->expr->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->expr->inst.opcode], + curr->expr->inst.oparg, + (void *)curr->expr->inst.operand); + if (compile_sym_to_uops(&emitter, curr->expr, ctx, true) < 0) { + goto error; + } + // Anything less means no assignment target at all. + if (curr->assignment_target >= TARGET_UNUSED) { + _PyUOpInstruction inst = { + curr->assignment_target == TARGET_UNUSED + ? POP_TOP : STORE_FAST, + curr->assignment_target, 0, 0}; + if (emit_i(&emitter, inst) < 0) { + goto error; + } + } + break; + } + case IR_PLAIN_INST: { + if (emit_i(&emitter, curr->inst) < 0) { + goto error; + } + break; + } + case IR_FRAME_PUSH_INFO: { + _Py_UOpsOptIREntry *prev = emitter.curr_frame_ir_entry; + emitter.curr_frame_ir_entry = curr; + curr->prev_frame_ir = prev; + break; + } + case IR_FRAME_POP_INFO: { + _Py_UOpsOptIREntry *prev = emitter.curr_frame_ir_entry->prev_frame_ir; + // There will always be the root frame. + assert(prev != NULL); + emitter.curr_frame_ir_entry->prev_frame_ir = NULL; + emitter.curr_frame_ir_entry = prev; + break; + } + case IR_NOP: break; + } + } + + if (emit_i(&emitter, *ctx->terminating) < 0) { + return -1; + } + *nop_to = (int)(emitter.writebuffer_end - emitter.writebuffer); + return emitter.curr_i; + +error: + return -1; +} static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -59,6 +1593,48 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { + _PyUOpInstruction *temp_writebuffer = NULL; + + temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size * OVERALLOCATE_FACTOR); + if (temp_writebuffer == NULL) { + goto error; + } + + + // Pass: Abstract interpretation and symbolic analysis + _Py_UOpsAbstractInterpContext *ctx = uop_abstract_interpret( + co, buffer, + buffer_size, curr_stacklen); + + if (ctx == NULL) { + goto error; + } + + _PyUOpInstruction *writebuffer_end = temp_writebuffer + buffer_size; + // Compile the SSA IR + int nop_to = 0; + int trace_len = emit_uops_from_ctx( + ctx, + temp_writebuffer, + writebuffer_end, + &nop_to + ); + if (trace_len < 0 || trace_len > buffer_size) { + goto error; + } + + // Pass: fix up side exit stubs. This MUST be called as the last pass! + // trace_len = copy_over_exit_stubs(buffer, original_trace_len, temp_writebuffer, trace_len); + + // Fill in our new trace! + memcpy(buffer, temp_writebuffer, buffer_size * sizeof(_PyUOpInstruction)); + + PyMem_Free(temp_writebuffer); + remove_unneeded_uops(buffer, buffer_size); + return 0; -} +error: + PyMem_Free(temp_writebuffer); + return -1; +} \ No newline at end of file diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 7ed3b57136554f..976c72bfdca684 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -25,6 +25,7 @@ class Properties: pure: bool passthrough: bool guard: bool + specially_handled_in_optimizer: bool def dump(self, indent: str) -> None: print(indent, end="") @@ -52,6 +53,7 @@ def from_list(properties: list["Properties"]) -> "Properties": pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), guard=all(p.guard for p in properties), + specially_handled_in_optimizer=False, ) @@ -74,6 +76,7 @@ def from_list(properties: list["Properties"]) -> "Properties": pure=False, passthrough=False, guard=False, + specially_handled_in_optimizer=False, ) @@ -446,6 +449,9 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: def compute_properties(op: parser.InstDef) -> Properties: + # Importing here to avoid a circular import. + from tier2_abstract_generator import SPECIALLY_HANDLED_ABSTRACT_INSTR + has_free = ( variable_used(op, "PyCell_New") or variable_used(op, "PyCell_GET") @@ -473,7 +479,8 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts, + guard=passthrough and deopts and infallible, + specially_handled_in_optimizer=op.name in SPECIALLY_HANDLED_ABSTRACT_INSTR, ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index c6c602c7122b41..139d0d507e301d 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -215,6 +215,8 @@ def cflags(p: Properties) -> str: flags.append("HAS_PURE_FLAG") if p.passthrough: flags.append("HAS_PASSTHROUGH_FLAG") + if p.specially_handled_in_optimizer: + flags.append("HAS_SPECIAL_OPT_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 1826a0b645c3b8..30958c845db6f1 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -52,6 +52,7 @@ "ESCAPES", "PURE", "PASSTHROUGH", + "SPECIAL_OPT", ] diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index ee41d6297ab145..985a37b5285968 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -45,11 +45,9 @@ "POP_TOP", "PUSH_NULL", "SWAP", - "END_SEND", # Frame stuff "_PUSH_FRAME", "_POP_FRAME", - "_SHRINK_STACK", } NO_CONST_OR_TYPE_EVALUATE = { @@ -60,6 +58,7 @@ "_CHECK_PEP_523", "_CHECK_STACK_SPACE", "_INIT_CALL_PY_EXACT_ARGS", + "_END_SEND", } @@ -292,6 +291,7 @@ def _write_body_abstract_interp_guard_uop( # Does the input specify typed inputs? if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): + out.emit("goto guard_required;\n") return # If the input types already match, eliminate the guard # Read the cache information to check the auxiliary type information From f0e5dec35682a820825bd5c816132f65642b5d69 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 15 Jan 2024 01:07:02 +0800 Subject: [PATCH 006/111] fix a whole bunch of bugs --- Lib/test/test_capi/test_opt.py | 387 +++++++++++------- Python/abstract_interp_cases.c.h | 26 +- Python/bytecodes.c | 4 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Python/optimizer.c | 6 +- Python/optimizer_analysis.c | 71 +++- .../tier2_abstract_generator.py | 11 +- 8 files changed, 319 insertions(+), 190 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 685cca8d86e38c..b7dfcad7f20405 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -542,165 +542,240 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - def test_int_constant_propagation(self): - def testfunc(loops): - num = 0 - for _ in range(loops): - x = 0 - y = 1 - a = x + y - return 1 - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 1) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 0) - - def test_int_type_propagation(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num - a = x + 1 - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 127) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_int_impure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num - y = 1 - x // 2 - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_int_impure_region_attr(self): - class A: - foo = 1 - def testfunc(loops): - num = 0 - while num < loops: - x = A.foo + A.foo - y = 1 - A.foo - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - def test_int_large_pure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num + num - num + num - num + num + num + num - num + num - num - y = 1 - a = x + num + num + num - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 11) - - def test_call_py_exact_args(self): - def testfunc(n): - def dummy(x): - return x+1 - for i in range(n): - dummy(i) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) - - def test_frame_instance_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a - - a = A() - def testfunc(n): - for i in range(n): - a.foo() - + # def test_int_constant_propagation(self): + # def testfunc(loops): + # num = 0 + # for _ in range(loops): + # x = 0 + # y = 1 + # a = x + y + # return 1 + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 1) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 0) + # + # def test_int_type_propagation(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + # a = x + 1 + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 127) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_int_impure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + # y = 1 + # x // 2 + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_int_impure_region_attr(self): + # class A: + # foo = 1 + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = A.foo + A.foo + # y = 1 + # A.foo + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # def test_int_large_pure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + num - num + num - num + num + num + num - num + num - num + # y = 1 + # a = x + num + num + num + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 11) + # + # def test_call_py_exact_args(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # dummy(i) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertIn("_BINARY_OP_ADD_INT", uops) + # + # def test_frame_instance_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # a = A() + # def testfunc(n): + # for i in range(n): + # a.foo() + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + # + # def test_frame_class_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # def testfunc(n): + # a = A() + # for i in range(n): + # A.foo(a) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_CLASS", uops) + # + # def test_call_constant_propagate_through_frame(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # dummy(1) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # + # def test_comprehension(self): + # def testfunc(n): + # for _ in range(n): + # return [i for i in range(n)] + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_truncated_zipfile(self): + import io + import zipfile + from random import random opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - - def test_frame_class_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a + FIXEDTEST_SIZE = 1000 + line_gen = [bytes("Zipfile test line %d. random float: %f\n" % + (i, random()), "ascii") + for i in range(FIXEDTEST_SIZE)] + + data = b''.join(line_gen) + compression = zipfile.ZIP_DEFLATED + fp = io.BytesIO() + with zipfile.ZipFile(fp, mode='w') as zipf: + zipf.writestr('strfile', data, compress_type=compression) + end_offset = fp.tell() + zipfiledata = fp.getvalue() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + zipopen.read() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read(100): + pass + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read1(100): + pass - def testfunc(n): - a = A() - for i in range(n): - A.foo(a) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_CLASS", uops) if __name__ == "__main__": unittest.main() diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ebce3e85157824..8d4b65148a2c58 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -620,6 +620,10 @@ } case _UNPACK_SEQUENCE: { + for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } stack_pointer += -1 + oparg; break; } @@ -628,8 +632,8 @@ PyObject **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { - __values_[case_gen_i] = sym_init_unknown(ctx); - if(__values_[case_gen_i] == NULL) goto error; + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; } stack_pointer += -1 + oparg; break; @@ -639,8 +643,8 @@ PyObject **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { - __values_[case_gen_i] = sym_init_unknown(ctx); - if(__values_[case_gen_i] == NULL) goto error; + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; } stack_pointer += -1 + oparg; break; @@ -650,14 +654,22 @@ PyObject **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { - __values_[case_gen_i] = sym_init_unknown(ctx); - if(__values_[case_gen_i] == NULL) goto error; + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; } stack_pointer += -1 + oparg; break; } case _UNPACK_EX: { + for (int case_gen_i = 0; case_gen_i < oparg & 0xFF; case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + for (int case_gen_i = 0; case_gen_i < oparg >> 8; case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } stack_pointer += (oparg >> 8) + (oparg & 0xFF); break; } @@ -909,6 +921,7 @@ if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); if(__self_or_null_ == NULL) goto error; + sym_set_type(__self_or_null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __self_or_null_; stack_pointer += (oparg & 1); @@ -1419,6 +1432,7 @@ _Py_UOpsSymbolicExpression *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; + sym_set_type(__next_, PYLONG_TYPE, 0); stack_pointer[0] = __next_; stack_pointer += 1; break; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index dd8e2b0269228e..501acaa28dec8d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1858,7 +1858,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_LOAD_ATTR, (owner -- attr, self_or_null if (oparg & 1))) { + op(_LOAD_ATTR, (owner -- attr, self_or_null: &NULL_TYPE if (oparg & 1))) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ @@ -1876,7 +1876,7 @@ dummy_func( something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ DECREF_INPUTS(); ERROR_IF(attr == NULL, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f2ba4fd981a040..4f9c786588d043 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1598,7 +1598,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error_tier_two; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 68468728d44bf8..4f93b47d596b26 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3440,7 +3440,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error; diff --git a/Python/optimizer.c b/Python/optimizer.c index f4dbb2dcd58ed6..e91fff3c5be452 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -802,10 +802,8 @@ uop_optimize( return err; } OPT_STAT_INC(traces_created); - err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); - if (err < 0) { - return -1; - } + // This clears its errors, so if it fails it just doesn't optimize. + _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 95727242c41e98..4c7cefe9558798 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -289,6 +289,7 @@ typedef struct sym_arena { typedef struct frequent_syms { _Py_UOpsSymbolicExpression *nulL_sym; + _Py_UOpsSymbolicExpression *push_nulL_sym; } frequent_syms; // Tier 2 types meta interpreter @@ -320,7 +321,7 @@ abstractinterp_dealloc(PyObject *o) { _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; Py_XDECREF(self->frame); - Py_DECREF(self->ir); + Py_XDECREF(self->ir); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -422,7 +423,7 @@ abstractinterp_context_new(PyCodeObject *co, // IR and sym setup self->ir = ir; self->frequent_syms.nulL_sym = NULL; - + self->frequent_syms.push_nulL_sym = NULL; return self; @@ -431,6 +432,8 @@ abstractinterp_context_new(PyCodeObject *co, if (self != NULL) { self->s_arena.arena = NULL; } + self->frame = NULL; + self->ir = NULL; Py_XDECREF(self); Py_XDECREF(ir); Py_XDECREF(frame); @@ -559,15 +562,25 @@ sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum static inline PyFunctionObject * extract_func_from_sym(_Py_UOpsSymbolicExpression *frame_sym) { +#ifdef Py_DEBUG +char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "write_stack_to_ir\n"); +#endif switch(frame_sym->inst.opcode) { case _INIT_CALL_PY_EXACT_ARGS: { _Py_UOpsSymbolicExpression *callable_sym = frame_sym->operands[0]; if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { + DPRINTF(2, "error: _PUSH_FRAME not function type\n"); return NULL; } uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); if (func == NULL) { + DPRINTF(2, "error: _PUSH_FRAME cannot find func version\n"); return NULL; } return func; @@ -824,13 +837,28 @@ sym_init_null(_Py_UOpsAbstractInterpContext *ctx) if (null_sym == NULL) { return NULL; } - null_sym->inst.opcode = PUSH_NULL; sym_set_type(null_sym, NULL_TYPE, 0); ctx->frequent_syms.nulL_sym = null_sym; return null_sym; } +static _Py_UOpsSymbolicExpression* +sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx->frequent_syms.push_nulL_sym != NULL) { + return ctx->frequent_syms.push_nulL_sym; + } + _Py_UOpsSymbolicExpression *null_sym = sym_init_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + null_sym->inst.opcode = PUSH_NULL; + sym_set_type(null_sym, NULL_TYPE, 0); + ctx->frequent_syms.push_nulL_sym = null_sym; + return null_sym; +} + static inline bool sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) { @@ -994,7 +1022,6 @@ GETITEM(_Py_UOpsAbstractInterpContext *ctx, Py_ssize_t i) { static int uop_abstract_interpret_single_inst( - PyCodeObject *co, _PyUOpInstruction *inst, _PyUOpInstruction *end, _Py_UOpsAbstractInterpContext *ctx @@ -1009,7 +1036,7 @@ uop_abstract_interpret_single_inst( #endif #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) -#define STACK_SIZE() (co->co_stacksize) +#define STACK_SIZE() (ctx->frame->stack_len) #define BASIC_STACKADJ(n) (stack_pointer += n) #ifdef Py_DEBUG @@ -1056,29 +1083,34 @@ uop_abstract_interpret_single_inst( case LOAD_FAST_CHECK: STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); - // Value might be uninitialized, and might error. - if(PEEK(1)->inst.opcode == INIT_FAST) { - // In that case, to be safe, treat it as an impure region - ctx->curr_region_id++; - ctx->frame->stack_pointer = stack_pointer; - write_stack_to_ir(ctx, inst, true); - } + assert(PEEK(1)->inst.opcode == INIT_FAST); + PEEK(1)->inst.opcode = LOAD_FAST_CHECK; + ctx->frame->stack_pointer = stack_pointer; + write_stack_to_ir(ctx, inst, true); break; case LOAD_FAST: STACK_GROW(1); // Guaranteed by the CPython bytecode compiler to not be uninitialized. PEEK(1) = GETLOCAL(oparg); + if (sym_is_type(PEEK(1), NULL_TYPE)) { + PEEK(1)->inst.opcode = LOAD_FAST_CHECK; + } assert(PEEK(1)); break; case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); - _Py_UOpsSymbolicExpression *null_sym = sym_init_null(ctx); - if (null_sym == NULL) { + assert(PEEK(1)->inst.opcode == INIT_FAST); + PEEK(1)->inst.opcode = LOAD_FAST_AND_CLEAR; + ctx->frame->stack_pointer = stack_pointer; + write_stack_to_ir(ctx, inst, true); + _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); + if (new_local == NULL) { goto error; } - GETLOCAL(oparg) = null_sym; + sym_set_type(new_local, NULL_TYPE, 0); + GETLOCAL(oparg) = new_local; break; } case LOAD_CONST: { @@ -1101,6 +1133,8 @@ uop_abstract_interpret_single_inst( break; } case COPY: { + write_stack_to_ir(ctx, inst, true); + ir_plain_inst(ctx->ir, *inst); _Py_UOpsSymbolicExpression *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); _Py_UOpsSymbolicExpression *temp = sym_init_unknown(ctx); @@ -1120,7 +1154,7 @@ uop_abstract_interpret_single_inst( case PUSH_NULL: { STACK_GROW(1); - _Py_UOpsSymbolicExpression *null_sym = sym_init_null(ctx); + _Py_UOpsSymbolicExpression *null_sym = sym_init_push_null(ctx); if (null_sym == NULL) { goto error; } @@ -1309,7 +1343,7 @@ uop_abstract_interpret( } status = uop_abstract_interpret_single_inst( - co, curr, end, ctx + curr, end, ctx ); if (status == ABSTRACT_INTERP_ERROR) { goto error; @@ -1635,6 +1669,9 @@ _Py_uop_analyze_and_optimize( return 0; error: + if (PyErr_Occurred()) { + PyErr_Clear(); + } PyMem_Free(temp_writebuffer); return -1; } \ No newline at end of file diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 985a37b5285968..8d64cd2297d79c 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -121,7 +121,7 @@ def _write_body_abstract_interp_impure_uop( # Simply make all outputs effects unknown for var in mangled_uop.stack.outputs: - if var.name in UNUSED or var.peek: + if (var.name in UNUSED and var.size == "1") or var.peek: continue if var.size == "1": @@ -129,12 +129,17 @@ def _write_body_abstract_interp_impure_uop( out.emit(f"if({var.name} == NULL) goto error;\n") if var.name in ("null", "__null_"): out.emit(f"sym_set_type({var.name}, NULL_TYPE, 0);\n") + elif var.type_prop: + out.emit(f"sym_set_type({var.name}, {var.type_prop[0]}, 0);\n") else: + # See UNPACK_SEQUENCE for when we need this. out.emit( f"for (int case_gen_i = 0; case_gen_i < {var.size}; case_gen_i++) {{\n" ) - out.emit(f"{var.name}[case_gen_i] = sym_init_unknown(ctx);\n") - out.emit(f"if({var.name}[case_gen_i] == NULL) goto error;\n") + out.emit(f"*(stack_pointer + case_gen_i) = sym_init_unknown(ctx);\n") + out.emit(f"if(*(stack_pointer + case_gen_i) == NULL) goto error;\n") + if var.type_prop: + out.emit(f"sym_set_type(*(stack_pointer + case_gen_i), {var.type_prop[0]}, 0);\n") out.emit("}\n") From 52e368f042506137fb0b35ccd7327845de5e5660 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 15 Jan 2024 02:21:33 +0800 Subject: [PATCH 007/111] properly handle runtime self_or_null --- Lib/test/test_capi/test_opt.py | 382 +++++++++++++++---------------- Python/abstract_interp_cases.c.h | 2 +- Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 9 +- 4 files changed, 200 insertions(+), 195 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b7dfcad7f20405..cec07f13768ae4 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -542,197 +542,197 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - # def test_int_constant_propagation(self): - # def testfunc(loops): - # num = 0 - # for _ in range(loops): - # x = 0 - # y = 1 - # a = x + y - # return 1 - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 1) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 0) - # - # def test_int_type_propagation(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num - # a = x + 1 - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 127) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_int_impure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num - # y = 1 - # x // 2 - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_int_impure_region_attr(self): - # class A: - # foo = 1 - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = A.foo + A.foo - # y = 1 - # A.foo - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # def test_int_large_pure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num + num - num + num - num + num + num + num - num + num - num - # y = 1 - # a = x + num + num + num - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 11) - # - # def test_call_py_exact_args(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # dummy(i) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertIn("_BINARY_OP_ADD_INT", uops) - # - # def test_frame_instance_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # a = A() - # def testfunc(n): - # for i in range(n): - # a.foo() - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - # - # def test_frame_class_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # def testfunc(n): - # a = A() - # for i in range(n): - # A.foo(a) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_CLASS", uops) - # - # def test_call_constant_propagate_through_frame(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # dummy(1) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # - # def test_comprehension(self): - # def testfunc(n): - # for _ in range(n): - # return [i for i in range(n)] - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 127) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region_attr(self): + class A: + foo = 1 + def testfunc(loops): + num = 0 + while num < loops: + x = A.foo + A.foo + y = 1 + A.foo + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + def test_int_large_pure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + num - num + num - num + num + num + num - num + num - num + y = 1 + a = x + num + num + num + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 11) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + + def test_frame_instance_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + a = A() + def testfunc(n): + for i in range(n): + a.foo() + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + + def test_frame_class_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + def testfunc(n): + a = A() + for i in range(n): + A.foo(a) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_CLASS", uops) + + def test_call_constant_propagate_through_frame(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(1) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) def test_truncated_zipfile(self): import io diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 8d4b65148a2c58..9420c399761ebb 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -921,7 +921,7 @@ if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); if(__self_or_null_ == NULL) goto error; - sym_set_type(__self_or_null_, NULL_TYPE, 0); + sym_set_type(__self_or_null_, SELF_OR_NULL, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __self_or_null_; stack_pointer += (oparg & 1); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 501acaa28dec8d..0cb2fbbb08eea2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1858,7 +1858,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_LOAD_ATTR, (owner -- attr, self_or_null: &NULL_TYPE if (oparg & 1))) { + op(_LOAD_ATTR, (owner -- attr, self_or_null: &SELF_OR_NULL if (oparg & 1))) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4c7cefe9558798..3c1dabe243b2ff 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -68,6 +68,8 @@ typedef enum { PYMETHOD_TYPE = 7, GUARD_DORV_VALUES_TYPE = 8, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE = 9, + // Can't statically determine if self or null. + SELF_OR_NULL = 10, INVALID_TYPE = 31, } _Py_UOpsSymExprTypeEnum; @@ -1198,8 +1200,11 @@ uop_abstract_interpret_single_inst( goto error; } stack_pointer = ctx->frame->stack_pointer; - for (int i = 0; i < argcount; i++) { - sym_copy_type(args[i], ctx->frame->locals[i]); + // Cannot determine statically, so we can't propagate types. + if (!sym_is_type(self_or_null, SELF_OR_NULL)) { + for (int i = 0; i < argcount; i++) { + sym_copy_type(args[i], ctx->frame->locals[i]); + } } break; } From a273a2f229ec8dfa1d2c261be88e0501c89f41e9 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 15 Jan 2024 02:33:27 +0800 Subject: [PATCH 008/111] fix faulty assertion --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 3c1dabe243b2ff..de3814fd157a95 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -740,7 +740,7 @@ sym_copy_type(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression * static void sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) { - uint32_t immutables = (1 << NULL_TYPE | 1 << PYLONG_TYPE | 1 << PYFLOAT_TYPE | 1 << PYUNICODE_TYPE); + uint32_t immutables = (1 << NULL_TYPE | 1 << PYLONG_TYPE | 1 << PYFLOAT_TYPE | 1 << PYUNICODE_TYPE | 1 << SELF_OR_NULL); to_sym->sym_type.types = (from_sym->sym_type.types & immutables); Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); } @@ -1085,7 +1085,7 @@ uop_abstract_interpret_single_inst( case LOAD_FAST_CHECK: STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); - assert(PEEK(1)->inst.opcode == INIT_FAST); + assert(PEEK(1)->inst.opcode == INIT_FAST || PEEK(1)->inst.opcode == LOAD_FAST_CHECK); PEEK(1)->inst.opcode = LOAD_FAST_CHECK; ctx->frame->stack_pointer = stack_pointer; write_stack_to_ir(ctx, inst, true); From 60a1d7997d4d1e4788ba61172f8a0103d0ad6243 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:07:16 +0800 Subject: [PATCH 009/111] fix build --- Makefile.pre.in | 2 +- Python/abstract_interp_cases.c.h | 76 +++++++++---------- .../tier2_abstract_generator.py | 8 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 6f19e0c513a2c9..a4862d24d105e1 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1603,7 +1603,7 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ -o $(srcdir)/Python/abstract_interp_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ -o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 9420c399761ebb..a416c89574a136 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -69,7 +69,7 @@ case _TO_BOOL_BOOL: { _Py_UOpsSymbolicExpression *__value_; __value_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__value_)) { PyObject *value; value = get_const(__value_); @@ -135,7 +135,7 @@ _Py_UOpsSymbolicExpression *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation + // Constant evaluation if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; @@ -147,13 +147,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0); sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0); goto guard_required; @@ -262,7 +262,7 @@ _Py_UOpsSymbolicExpression *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation + // Constant evaluation if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; @@ -274,13 +274,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0); sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0); goto guard_required; @@ -386,7 +386,7 @@ _Py_UOpsSymbolicExpression *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation + // Constant evaluation if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; @@ -398,13 +398,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0); sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0); goto guard_required; @@ -932,7 +932,7 @@ _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -943,13 +943,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); goto guard_required; } @@ -981,7 +981,7 @@ _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1014,7 +1014,7 @@ case _CHECK_ATTR_WITH_HINT: { _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1063,7 +1063,7 @@ _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1099,7 +1099,7 @@ case _GUARD_DORV_VALUES: { _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1110,13 +1110,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); goto guard_required; } @@ -1300,7 +1300,7 @@ case _ITER_CHECK_LIST: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1318,7 +1318,7 @@ case _GUARD_NOT_EXHAUSTED_LIST: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1347,7 +1347,7 @@ case _ITER_CHECK_TUPLE: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1365,7 +1365,7 @@ case _GUARD_NOT_EXHAUSTED_TUPLE: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1394,7 +1394,7 @@ case _ITER_CHECK_RANGE: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1413,7 +1413,7 @@ case _GUARD_NOT_EXHAUSTED_RANGE: { _Py_UOpsSymbolicExpression *__iter_; __iter_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__iter_)) { PyObject *iter; iter = get_const(__iter_); @@ -1491,7 +1491,7 @@ case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1502,13 +1502,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); goto guard_required; } @@ -1519,7 +1519,7 @@ _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1530,13 +1530,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); goto guard_required; } @@ -1590,7 +1590,7 @@ case _CHECK_ATTR_METHOD_LAZY_DICT: { _Py_UOpsSymbolicExpression *__owner_; __owner_ = stack_pointer[-1]; - // Constant evaluation + // Constant evaluation if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1629,7 +1629,7 @@ _Py_UOpsSymbolicExpression *__callable_; __null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; - // Constant evaluation + // Constant evaluation if (is_const(__callable_) && is_const(__null_)) { PyObject *null; PyObject *callable; @@ -1641,13 +1641,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0); sym_set_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0); goto guard_required; @@ -1677,7 +1677,7 @@ __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation + // Constant evaluation if (is_const(__callable_) && is_const(__self_or_null_)) { PyObject *self_or_null; PyObject *callable; @@ -1692,13 +1692,13 @@ DPRINTF(3, "const eliminated guard\n"); break; } - // Type guard elimination + // Type guard elimination if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)){ DPRINTF(2, "type propagation eliminated guard\n"); break; } else { - // Type propagation + // Type propagation sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); goto guard_required; } diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 8d64cd2297d79c..4c97474220d3f2 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -159,7 +159,7 @@ def mangle_uop_names(uop: Uop) -> Uop: # Returns a tuple of a pointer to an array of subexpressions, the length of said array # and a string containing the join of all other subexpressions obtained from stack input. # This grabs variadic inputs that depend on things like oparg or cache -def get_subexpressions(input_vars: list[StackItem]) -> tuple[str, int, str]: +def get_subexpressions(input_vars: list[StackItem]) -> tuple[str | None, int, str]: arr_var = [(var.name, var) for var in input_vars if var.size > "1"] assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None @@ -272,7 +272,7 @@ def _write_body_abstract_interp_guard_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - out.emit("// Constant evaluation \n") + out.emit("// Constant evaluation\n") predicates_str = " && ".join( [ f"is_const({var.name})" @@ -325,14 +325,14 @@ def _write_body_abstract_interp_guard_uop( f"sym_set_type((_Py_UOpsSymbolicExpression *){output_var.name}, {typname}, (uint32_t){aux})" ) - out.emit("// Type guard elimination \n") + out.emit("// Type guard elimination\n") out.emit(f"if ({' && '.join(predicates)}){{\n") out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') out.emit("break;\n") out.emit("}\n") # Else we need the guard out.emit("else {\n") - out.emit("// Type propagation \n") + out.emit("// Type propagation\n") for prop in propagates: out.emit(f"{prop};\n") out.emit("goto guard_required;\n") From 7077ad5c5bda2304f6cd05aa4191894571c40d90 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:34:51 +0800 Subject: [PATCH 010/111] fix all tests except test_capi and maybe test_ctypes --- Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_capi/test_opt.py | 500 ++++++++++-------- Python/abstract_interp_cases.c.h | 8 - Python/optimizer.c | 10 +- Python/optimizer_analysis.c | 84 +-- Tools/cases_generator/analyzer.py | 3 +- .../cases_generator/tier2_abstract_common.py | 21 + .../tier2_abstract_generator.py | 18 +- 8 files changed, 347 insertions(+), 301 deletions(-) create mode 100644 Tools/cases_generator/tier2_abstract_common.py diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 09379f361fdf4f..25fca32a408990 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -198,10 +198,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, - [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, - [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_LOAD_CONST_INLINE] = 0, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index cec07f13768ae4..49f461253905c8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -5,6 +5,8 @@ import _testinternalcapi +from test.support.script_helper import assert_python_ok +from test import support @contextlib.contextmanager def temporary_optimizer(opt): @@ -542,239 +544,279 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - def test_int_constant_propagation(self): - def testfunc(loops): - num = 0 - for _ in range(loops): - x = 0 - y = 1 - a = x + y - return 1 + # def test_int_constant_propagation(self): + # def testfunc(loops): + # num = 0 + # for _ in range(loops): + # x = 0 + # y = 1 + # a = x + y + # return 1 + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 1) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 0) + # + # def test_int_type_propagation(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + # a = x + 1 + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 127) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_int_impure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + # y = 1 + # x // 2 + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_int_impure_region_attr(self): + # class A: + # foo = 1 + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = A.foo + A.foo + # y = 1 + # A.foo + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # def test_int_large_pure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + num - num + num - num + num + num + num - num + num - num + # y = 1 + # a = x + num + num + num + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 11) + # + # def test_call_py_exact_args(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # dummy(i) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertIn("_BINARY_OP_ADD_INT", uops) + # + # def test_frame_instance_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # a = A() + # def testfunc(n): + # for i in range(n): + # a.foo() + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + # + # def test_frame_class_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # def testfunc(n): + # a = A() + # for i in range(n): + # A.foo(a) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_CLASS", uops) + # + # def test_call_constant_propagate_through_frame(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # x = dummy(3) + # return x + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 4) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # + # def test_comprehension(self): + # def testfunc(n): + # for _ in range(n): + # return [i for i in range(n)] + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # def test_truncated_zipfile(self): + # import io + # import zipfile + # from random import random + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # FIXEDTEST_SIZE = 1000 + # line_gen = [bytes("Zipfile test line %d. random float: %f\n" % + # (i, random()), "ascii") + # for i in range(FIXEDTEST_SIZE)] + # + # data = b''.join(line_gen) + # compression = zipfile.ZIP_DEFLATED + # fp = io.BytesIO() + # with zipfile.ZipFile(fp, mode='w') as zipf: + # zipf.writestr('strfile', data, compress_type=compression) + # end_offset = fp.tell() + # zipfiledata = fp.getvalue() + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # zipopen.read() + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # while zipopen.read(100): + # pass + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # while zipopen.read1(100): + # pass + @unittest.skipIf(support.Py_TRACE_REFS, 'cannot test Py_TRACE_REFS build') + def test_set_nomemory(self): + def foo(): + res = [] + if 1: + import _testcapi + import sys + + class C(): pass + + # The first loop tests both functions and that remove_mem_hooks() + # can be called twice in a row. The second loop checks a call to + # set_nomemory() after a call to remove_mem_hooks(). The third + # loop checks the start and stop arguments of set_nomemory(). + for outer_cnt in range(1, 4): + start = 10 * outer_cnt + for j in range(100): + if j == 0: + if outer_cnt != 3: + _testcapi.set_nomemory(start) + else: + _testcapi.set_nomemory(start, start + 1) + try: + C() + except MemoryError as e: + if outer_cnt != 3: + _testcapi.remove_mem_hooks() + res.append((outer_cnt, j)) + _testcapi.remove_mem_hooks() + break + return res opt = _testinternalcapi.get_uop_optimizer() res = None with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 1) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 0) - - def test_int_type_propagation(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num - a = x + 1 - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 127) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_int_impure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num - y = 1 - x // 2 - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_int_impure_region_attr(self): - class A: - foo = 1 - def testfunc(loops): - num = 0 - while num < loops: - x = A.foo + A.foo - y = 1 - A.foo - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - def test_int_large_pure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num + num - num + num - num + num + num + num - num + num - num - y = 1 - a = x + num + num + num - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 11) - - def test_call_py_exact_args(self): - def testfunc(n): - def dummy(x): - return x+1 - for i in range(n): - dummy(i) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) - - def test_frame_instance_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a - - a = A() - def testfunc(n): - for i in range(n): - a.foo() - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - - def test_frame_class_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a - - def testfunc(n): - a = A() - for i in range(n): - A.foo(a) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_CLASS", uops) - - def test_call_constant_propagate_through_frame(self): - def testfunc(n): - def dummy(x): - return x+1 - for i in range(n): - dummy(1) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - - def test_comprehension(self): - def testfunc(n): - for _ in range(n): - return [i for i in range(n)] - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - def test_truncated_zipfile(self): - import io - import zipfile - from random import random - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - FIXEDTEST_SIZE = 1000 - line_gen = [bytes("Zipfile test line %d. random float: %f\n" % - (i, random()), "ascii") - for i in range(FIXEDTEST_SIZE)] - - data = b''.join(line_gen) - compression = zipfile.ZIP_DEFLATED - fp = io.BytesIO() - with zipfile.ZipFile(fp, mode='w') as zipf: - zipf.writestr('strfile', data, compress_type=compression) - end_offset = fp.tell() - zipfiledata = fp.getvalue() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - zipopen.read() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read(100): - pass - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read1(100): - pass + res = foo() + for (cnt, j) in res: + self.assertEqual(j, cnt * 5) if __name__ == "__main__": diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a416c89574a136..016e1c956f5a61 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1958,10 +1958,6 @@ break; } - case _SET_IP: { - break; - } - case _SAVE_RETURN_OFFSET: { break; } @@ -1970,10 +1966,6 @@ break; } - case _CHECK_VALIDITY: { - break; - } - case _LOAD_CONST_INLINE_BORROW: { _Py_UOpsSymbolicExpression *__value_; __value_ = sym_init_unknown(ctx); diff --git a/Python/optimizer.c b/Python/optimizer.c index e91fff3c5be452..82bba84fb71f69 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -325,10 +325,11 @@ BRANCH_TO_GUARD[4][2] = { #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ DPRINTF(2, \ - " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ + " ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \ _PyUOpName(OPCODE), \ (OPARG), \ - (uint64_t)(OPERAND)); \ + (uint64_t)(OPERAND), \ + TARGET); \ assert(trace_length < max_length); \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ @@ -803,7 +804,10 @@ uop_optimize( } OPT_STAT_INC(traces_created); // This clears its errors, so if it fails it just doesn't optimize. - _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); + err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); + if (err < 0) { + return -1; + } _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index de3814fd157a95..4f18c29606fd19 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -38,7 +38,6 @@ _PyOpcode_isterminal(uint32_t opcode) opcode == _LOAD_FAST_CHECK || opcode == _LOAD_FAST_AND_CLEAR || opcode == INIT_FAST || - opcode == LOAD_CONST || opcode == CACHE || opcode == PUSH_NULL); } @@ -700,10 +699,6 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, va_start(curr, num_subexprs); for (; i < num_subexprs; i++) { - // Note: no incref here. symexprs are kept alive by the global expression - // table. - // We intentionally don't want to hold a reference to it so we don't - // need GC. operands[i] = va_arg(curr, _Py_UOpsSymbolicExpression *); assert(operands[i]); } @@ -961,10 +956,10 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b ctx->frame->stack[i] = new_stack; } // Write bookkeeping ops, but don't write duplicates. - if((curr-1)->opcode == _CHECK_VALIDITY && (curr-2)->opcode == _SET_IP) { - ir_plain_inst(ctx->ir, *(curr-2)); - ir_plain_inst(ctx->ir, *(curr-1)); - } +// if((curr-1)->opcode == _CHECK_VALIDITY && (curr-2)->opcode == _SET_IP) { +// ir_plain_inst(ctx->ir, *(curr-2)); +// ir_plain_inst(ctx->ir, *(curr-1)); +// } return 0; error: @@ -1076,7 +1071,7 @@ uop_abstract_interpret_single_inst( _Py_UOpsSymbolicExpression **stack_pointer = ctx->frame->stack_pointer; - DPRINTF(2, "Abstract interpreting %s:%d ", + DPRINTF(3, "Abstract interpreting %s:%d ", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], oparg); switch (opcode) { @@ -1097,6 +1092,7 @@ uop_abstract_interpret_single_inst( if (sym_is_type(PEEK(1), NULL_TYPE)) { PEEK(1)->inst.opcode = LOAD_FAST_CHECK; } + PEEK(1)->inst.target = inst->target; assert(PEEK(1)); break; @@ -1260,6 +1256,11 @@ uop_abstract_interpret_single_inst( stack_pointer[-1] = new_bottom; break; } + case _SET_IP: + case _CHECK_VALIDITY: + write_stack_to_ir(ctx, inst, true); + ir_plain_inst(ctx->ir, *inst); + break; default: DPRINTF(1, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); @@ -1274,7 +1275,7 @@ uop_abstract_interpret_single_inst( } DPRINTF(3, "} \n"); } - DPRINTF(2, " stack_level %d\n", STACK_LEVEL()); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); @@ -1288,7 +1289,7 @@ uop_abstract_interpret_single_inst( return ABSTRACT_INTERP_ERROR; guard_required: - DPRINTF(2, " stack_level %d\n", STACK_LEVEL()); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); @@ -1334,7 +1335,7 @@ uop_abstract_interpret( !op_is_specially_handled(curr->opcode) && !op_is_bookkeeping(curr->opcode) && !op_is_passthrough(curr->opcode)) { - DPRINTF(2, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); + DPRINTF(3, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); if (first_impure) { write_stack_to_ir(ctx, curr, false); clear_locals_type_info(ctx); @@ -1354,7 +1355,7 @@ uop_abstract_interpret( goto error; } else if (status == ABSTRACT_INTERP_GUARD_REQUIRED) { - DPRINTF(2, "GUARD\n"); + DPRINTF(3, "GUARD\n"); // Emit the state of the stack first. // Since this is a guard, copy over the type info write_stack_to_ir(ctx, curr, true); @@ -1404,10 +1405,11 @@ emit_i(_Py_UOpsEmitter *emitter, DPRINTF(2, "out of emission space\n"); return -1; } - DPRINTF(3, "Emitting instruction at [%d] op: %s, oparg: %d, operand: %" PRIu64 " \n", + DPRINTF(2, "Emitting instruction at [%d] op: %s, oparg: %d, target: %d, operand: %" PRIu64 " \n", emitter->curr_i, (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], inst.oparg, + inst.target, inst.operand); emitter->writebuffer[emitter->curr_i] = inst; emitter->curr_i++; @@ -1431,13 +1433,24 @@ count_stack_operands(_Py_UOpsSymbolicExpression *sym) static int compile_sym_to_uops(_Py_UOpsEmitter *emitter, _Py_UOpsSymbolicExpression *sym, - _Py_UOpsAbstractInterpContext *ctx, - bool do_cse) + _Py_UOpsAbstractInterpContext *ctx) { - _PyUOpInstruction inst; + _PyUOpInstruction inst = sym->inst;; // Since CPython is a stack machine, just compile in the order // seen in the operands, then the instruction itself. + if (_PyOpcode_isterminal(sym->inst.opcode)) { + // These are for unknown stack entries. + if (_PyOpcode_isstackvalue(sym->inst.opcode)) { + // Leave it be. These are initial values from the start + return 0; + } + if (sym->inst.opcode == INIT_FAST) { + inst.opcode = LOAD_FAST; + } + return emit_i(emitter, inst); + } + // Constant propagated value, load immediate constant if (sym->const_val != NULL && !_PyOpcode_isstackvalue(sym->inst.opcode)) { // Shrink the stack if operands consist of stack values. @@ -1455,25 +1468,12 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, } inst.opcode = _LOAD_CONST_INLINE; - inst.oparg = 0; + inst.oparg = sym->inst.oparg; // TODO memory leak. inst.operand = (uint64_t)Py_NewRef(sym->const_val); return emit_i(emitter, inst); } - if (_PyOpcode_isterminal(sym->inst.opcode)) { - // These are for unknown stack entries. - if (_PyOpcode_isstackvalue(sym->inst.opcode)) { - // Leave it be. These are initial values from the start - return 0; - } - inst = sym->inst; - if (sym->inst.opcode == INIT_FAST) { - inst.opcode = LOAD_FAST; - } - return emit_i(emitter, inst); - } - // Compile each operand Py_ssize_t operands_count = sym->operand_count; for (Py_ssize_t i = 0; i < operands_count; i++) { @@ -1484,7 +1484,7 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, if (compile_sym_to_uops( emitter, sym->operands[i], - ctx, true) < 0) { + ctx) < 0) { return -1; } } @@ -1537,7 +1537,7 @@ emit_uops_from_ctx( (curr->expr->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->expr->inst.opcode], curr->expr->inst.oparg, (void *)curr->expr->inst.operand); - if (compile_sym_to_uops(&emitter, curr->expr, ctx, true) < 0) { + if (compile_sym_to_uops(&emitter, curr->expr, ctx) < 0) { goto error; } // Anything less means no assignment target at all. @@ -1633,6 +1633,7 @@ _Py_uop_analyze_and_optimize( ) { _PyUOpInstruction *temp_writebuffer = NULL; + bool err_occurred = false; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size * OVERALLOCATE_FACTOR); if (temp_writebuffer == NULL) { @@ -1662,9 +1663,6 @@ _Py_uop_analyze_and_optimize( goto error; } - // Pass: fix up side exit stubs. This MUST be called as the last pass! - // trace_len = copy_over_exit_stubs(buffer, original_trace_len, temp_writebuffer, trace_len); - // Fill in our new trace! memcpy(buffer, temp_writebuffer, buffer_size * sizeof(_PyUOpInstruction)); @@ -1674,9 +1672,13 @@ _Py_uop_analyze_and_optimize( return 0; error: - if (PyErr_Occurred()) { - PyErr_Clear(); - } + // The only valid error we can raise is MemoryError. + // Other times it's not really errors but things like not being able + // to fetch a function version because the function got deleted. + err_occurred = PyErr_Occurred(); +// if (err_occurred && !PyErr_ExceptionMatches(PyExc_MemoryError)) { +// PyErr_Clear(); +// } PyMem_Free(temp_writebuffer); - return -1; + return err_occurred ? -1 : 0; } \ No newline at end of file diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 976c72bfdca684..b868a3e8b6c63e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -3,6 +3,7 @@ import parser from typing import Optional +from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR @dataclass class Properties: @@ -449,8 +450,6 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: def compute_properties(op: parser.InstDef) -> Properties: - # Importing here to avoid a circular import. - from tier2_abstract_generator import SPECIALLY_HANDLED_ABSTRACT_INSTR has_free = ( variable_used(op, "PyCell_New") diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py new file mode 100644 index 00000000000000..5a35a50adaddc3 --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -0,0 +1,21 @@ +# We have to keep this here instead of tier2_abstract_generator.py +# to avoid a circular import. +SPECIALLY_HANDLED_ABSTRACT_INSTR = { + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", + "POP_TOP", + "PUSH_NULL", + "SWAP", + # Frame stuff + "_PUSH_FRAME", + "_POP_FRAME", + # Bookkeeping + "_SET_IP", + "_CHECK_VALIDITY", +} + diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 4c97474220d3f2..61632173644483 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -26,6 +26,7 @@ emit_to, REPLACEMENT_FUNCTIONS, ) +from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR from tier2_generator import tier2_replace_error from cwriter import CWriter from typing import TextIO, Iterator @@ -34,21 +35,6 @@ DEFAULT_OUTPUT = ROOT / "Python/abstract_interp_cases.c.h" -SPECIALLY_HANDLED_ABSTRACT_INSTR = { - "LOAD_FAST", - "LOAD_FAST_CHECK", - "LOAD_FAST_AND_CLEAR", - "LOAD_CONST", - "STORE_FAST", - "STORE_FAST_MAYBE_NULL", - "COPY", - "POP_TOP", - "PUSH_NULL", - "SWAP", - # Frame stuff - "_PUSH_FRAME", - "_POP_FRAME", -} NO_CONST_OR_TYPE_EVALUATE = { "_RESUME_CHECK", @@ -159,7 +145,7 @@ def mangle_uop_names(uop: Uop) -> Uop: # Returns a tuple of a pointer to an array of subexpressions, the length of said array # and a string containing the join of all other subexpressions obtained from stack input. # This grabs variadic inputs that depend on things like oparg or cache -def get_subexpressions(input_vars: list[StackItem]) -> tuple[str | None, int, str]: +def get_subexpressions(input_vars: list[StackItem]) -> tuple[str | None, int | str, str]: arr_var = [(var.name, var) for var in input_vars if var.size > "1"] assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None From 5169bf3535b116f2b63047b04b7a7eabc015d374 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:39:34 +0800 Subject: [PATCH 011/111] run black and re-enable tests --- Lib/test/test_capi/test_opt.py | 501 ++++++++---------- Tools/cases_generator/analyzer.py | 2 +- .../cases_generator/tier2_abstract_common.py | 1 - .../tier2_abstract_generator.py | 8 +- 4 files changed, 239 insertions(+), 273 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 49f461253905c8..e9012659542c53 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -544,279 +544,242 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - # def test_int_constant_propagation(self): - # def testfunc(loops): - # num = 0 - # for _ in range(loops): - # x = 0 - # y = 1 - # a = x + y - # return 1 - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 1) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 0) - # - # def test_int_type_propagation(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num - # a = x + 1 - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 127) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_int_impure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num - # y = 1 - # x // 2 - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_int_impure_region_attr(self): - # class A: - # foo = 1 - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = A.foo + A.foo - # y = 1 - # A.foo - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # def test_int_large_pure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num + num - num + num - num + num + num + num - num + num - num - # y = 1 - # a = x + num + num + num - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 11) - # - # def test_call_py_exact_args(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # dummy(i) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertIn("_BINARY_OP_ADD_INT", uops) - # - # def test_frame_instance_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # a = A() - # def testfunc(n): - # for i in range(n): - # a.foo() - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - # - # def test_frame_class_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # def testfunc(n): - # a = A() - # for i in range(n): - # A.foo(a) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_CLASS", uops) - # - # def test_call_constant_propagate_through_frame(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # x = dummy(3) - # return x - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 4) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # - # def test_comprehension(self): - # def testfunc(n): - # for _ in range(n): - # return [i for i in range(n)] - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # def test_truncated_zipfile(self): - # import io - # import zipfile - # from random import random - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # FIXEDTEST_SIZE = 1000 - # line_gen = [bytes("Zipfile test line %d. random float: %f\n" % - # (i, random()), "ascii") - # for i in range(FIXEDTEST_SIZE)] - # - # data = b''.join(line_gen) - # compression = zipfile.ZIP_DEFLATED - # fp = io.BytesIO() - # with zipfile.ZipFile(fp, mode='w') as zipf: - # zipf.writestr('strfile', data, compress_type=compression) - # end_offset = fp.tell() - # zipfiledata = fp.getvalue() - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # zipopen.read() - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # while zipopen.read(100): - # pass - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # while zipopen.read1(100): - # pass - @unittest.skipIf(support.Py_TRACE_REFS, 'cannot test Py_TRACE_REFS build') - def test_set_nomemory(self): - def foo(): - res = [] - if 1: - import _testcapi - import sys - - class C(): pass - - # The first loop tests both functions and that remove_mem_hooks() - # can be called twice in a row. The second loop checks a call to - # set_nomemory() after a call to remove_mem_hooks(). The third - # loop checks the start and stop arguments of set_nomemory(). - for outer_cnt in range(1, 4): - start = 10 * outer_cnt - for j in range(100): - if j == 0: - if outer_cnt != 3: - _testcapi.set_nomemory(start) - else: - _testcapi.set_nomemory(start, start + 1) - try: - C() - except MemoryError as e: - if outer_cnt != 3: - _testcapi.remove_mem_hooks() - res.append((outer_cnt, j)) - _testcapi.remove_mem_hooks() - break - return res + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 opt = _testinternalcapi.get_uop_optimizer() res = None with temporary_optimizer(opt): - res = foo() - for (cnt, j) in res: - self.assertEqual(j, cnt * 5) + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 127) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + + def test_int_impure_region_attr(self): + class A: + foo = 1 + def testfunc(loops): + num = 0 + while num < loops: + x = A.foo + A.foo + y = 1 + A.foo + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 3) + def test_int_large_pure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + num - num + num - num + num + num + num - num + num - num + y = 1 + a = x + num + num + num + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 11) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + + def test_frame_instance_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + a = A() + def testfunc(n): + for i in range(n): + a.foo() + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + + def test_frame_class_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + def testfunc(n): + a = A() + for i in range(n): + A.foo(a) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_CLASS", uops) + + def test_call_constant_propagate_through_frame(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + x = dummy(3) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_truncated_zipfile(self): + import io + import zipfile + from random import random + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + FIXEDTEST_SIZE = 1000 + line_gen = [bytes("Zipfile test line %d. random float: %f\n" % + (i, random()), "ascii") + for i in range(FIXEDTEST_SIZE)] + + data = b''.join(line_gen) + compression = zipfile.ZIP_DEFLATED + fp = io.BytesIO() + with zipfile.ZipFile(fp, mode='w') as zipf: + zipf.writestr('strfile', data, compress_type=compression) + end_offset = fp.tell() + zipfiledata = fp.getvalue() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + zipopen.read() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read(100): + pass + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read1(100): + pass + if __name__ == "__main__": diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b868a3e8b6c63e..9f1067ff3311e0 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -5,6 +5,7 @@ from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR + @dataclass class Properties: escapes: bool @@ -450,7 +451,6 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: def compute_properties(op: parser.InstDef) -> Properties: - has_free = ( variable_used(op, "PyCell_New") or variable_used(op, "PyCell_GET") diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index 5a35a50adaddc3..b5af7222762225 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -18,4 +18,3 @@ "_SET_IP", "_CHECK_VALIDITY", } - diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 61632173644483..93bfa1b87d166c 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -125,7 +125,9 @@ def _write_body_abstract_interp_impure_uop( out.emit(f"*(stack_pointer + case_gen_i) = sym_init_unknown(ctx);\n") out.emit(f"if(*(stack_pointer + case_gen_i) == NULL) goto error;\n") if var.type_prop: - out.emit(f"sym_set_type(*(stack_pointer + case_gen_i), {var.type_prop[0]}, 0);\n") + out.emit( + f"sym_set_type(*(stack_pointer + case_gen_i), {var.type_prop[0]}, 0);\n" + ) out.emit("}\n") @@ -145,7 +147,9 @@ def mangle_uop_names(uop: Uop) -> Uop: # Returns a tuple of a pointer to an array of subexpressions, the length of said array # and a string containing the join of all other subexpressions obtained from stack input. # This grabs variadic inputs that depend on things like oparg or cache -def get_subexpressions(input_vars: list[StackItem]) -> tuple[str | None, int | str, str]: +def get_subexpressions( + input_vars: list[StackItem], +) -> tuple[str | None, int | str, str]: arr_var = [(var.name, var) for var in input_vars if var.size > "1"] assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None From 0929bb8b15a2b5134ed2957c3e00fd8deecccf2f Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:41:58 +0000 Subject: [PATCH 012/111] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by?= =?UTF-8?q?=20blurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst new file mode 100644 index 00000000000000..32d98c1a1a8d0a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst @@ -0,0 +1 @@ +Enable the tier 2 optimizer for all uops. From 7d66440d14b3d0f3a8d295fe6f3721788c95ada8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 19 Jan 2024 06:24:24 +0800 Subject: [PATCH 013/111] check for buffer overruns --- Python/optimizer_analysis.c | 120 +++++++++++++++++++++++++++--------- 1 file changed, 91 insertions(+), 29 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4f18c29606fd19..603d94557da359 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -161,7 +161,7 @@ static PyTypeObject _Py_UOps_Opt_IR_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -static void +static int ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore_IdKind store_fast_idx) { // Don't store stuff we know will never get compiled. @@ -184,9 +184,13 @@ ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore entry->assignment_target = store_fast_idx; entry->expr = expr; ir->curr_write++; + if (ir->curr_write >= Py_SIZE(ir)) { + return -1; + } + return 0; } -static void +static int ir_plain_inst(_Py_UOps_Opt_IR *ir, _PyUOpInstruction inst) { #ifdef Py_DEBUG @@ -204,6 +208,10 @@ ir_plain_inst(_Py_UOps_Opt_IR *ir, _PyUOpInstruction inst) entry->typ = IR_PLAIN_INST; entry->inst = inst; ir->curr_write++; + if (ir->curr_write >= Py_SIZE(ir)) { + return -1; + } + return 0; } static _Py_UOpsOptIREntry * @@ -222,11 +230,14 @@ ir_frame_push_info(_Py_UOps_Opt_IR *ir) entry->my_virtual_localsplus = NULL; entry->prev_frame_ir = NULL; ir->curr_write++; + if (ir->curr_write >= Py_SIZE(ir)) { + return NULL; + } return entry; } -static void +static int ir_frame_pop_info(_Py_UOps_Opt_IR *ir) { #ifdef Py_DEBUG @@ -240,6 +251,10 @@ ir_frame_pop_info(_Py_UOps_Opt_IR *ir) _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; entry->typ = IR_FRAME_POP_INFO; ir->curr_write++; + if (ir->curr_write >= Py_SIZE(entry)) { + return -1; + } + return 0; } typedef struct _Py_UOpsAbstractFrame { @@ -384,6 +399,9 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } _Py_UOpsOptIREntry *root_frame = ir_frame_push_info(ir); + if (root_frame == NULL) { + goto error; + } self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, &_Py_UOpsAbstractInterpContext_Type, @@ -419,6 +437,7 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } self->frame = frame; + assert(frame != NULL); root_frame->my_virtual_localsplus = self->localsplus; // IR and sym setup @@ -942,8 +961,11 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b #endif // Emit the state of the stack first. int stack_entries = ctx->frame->stack_pointer - ctx->frame->stack; + assert(stack_entries <= ctx->frame->stack_len); for (int i = 0; i < stack_entries; i++) { - ir_store(ctx->ir, ctx->frame->stack[i], TARGET_NONE); + if (ir_store(ctx->ir, ctx->frame->stack[i], TARGET_NONE) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *new_stack = sym_init_unknown(ctx); if (new_stack == NULL) { goto error; @@ -955,11 +977,6 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b } ctx->frame->stack[i] = new_stack; } - // Write bookkeeping ops, but don't write duplicates. -// if((curr-1)->opcode == _CHECK_VALIDITY && (curr-2)->opcode == _SET_IP) { -// ir_plain_inst(ctx->ir, *(curr-2)); -// ir_plain_inst(ctx->ir, *(curr-1)); -// } return 0; error: @@ -1083,7 +1100,9 @@ uop_abstract_interpret_single_inst( assert(PEEK(1)->inst.opcode == INIT_FAST || PEEK(1)->inst.opcode == LOAD_FAST_CHECK); PEEK(1)->inst.opcode = LOAD_FAST_CHECK; ctx->frame->stack_pointer = stack_pointer; - write_stack_to_ir(ctx, inst, true); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } break; case LOAD_FAST: STACK_GROW(1); @@ -1102,7 +1121,9 @@ uop_abstract_interpret_single_inst( assert(PEEK(1)->inst.opcode == INIT_FAST); PEEK(1)->inst.opcode = LOAD_FAST_AND_CLEAR; ctx->frame->stack_pointer = stack_pointer; - write_stack_to_ir(ctx, inst, true); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); if (new_local == NULL) { goto error; @@ -1120,7 +1141,9 @@ uop_abstract_interpret_single_inst( case STORE_FAST_MAYBE_NULL: case STORE_FAST: { _Py_UOpsSymbolicExpression *value = PEEK(1); - ir_store(ctx->ir, value, oparg); + if (ir_store(ctx->ir, value, oparg) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); if (new_local == NULL) { goto error; @@ -1131,8 +1154,12 @@ uop_abstract_interpret_single_inst( break; } case COPY: { - write_stack_to_ir(ctx, inst, true); - ir_plain_inst(ctx->ir, *inst); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); _Py_UOpsSymbolicExpression *temp = sym_init_unknown(ctx); @@ -1145,7 +1172,9 @@ uop_abstract_interpret_single_inst( } case POP_TOP: { - ir_store(ctx->ir, PEEK(1), -1); + if (ir_store(ctx->ir, PEEK(1), -1) < 0) { + goto error; + } STACK_SHRINK(1); break; } @@ -1164,10 +1193,15 @@ uop_abstract_interpret_single_inst( int argcount = oparg; _Py_UOpsAbstractFrame *old_frame = ctx->frame; // TOS is the new frame. - write_stack_to_ir(ctx, inst, true); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; _Py_UOpsOptIREntry *frame_ir_entry = ir_frame_push_info(ctx->ir); + if (frame_ir_entry == NULL) { + goto error; + } PyFunctionObject *func = extract_func_from_sym(ctx->new_frame_sym); if (func == NULL) { @@ -1186,7 +1220,9 @@ uop_abstract_interpret_single_inst( args--; argcount++; } - ir_plain_inst(ctx->ir, *inst); + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } if (ctx_frame_push( ctx, frame_ir_entry, @@ -1207,10 +1243,16 @@ uop_abstract_interpret_single_inst( case _POP_FRAME: { assert(STACK_LEVEL() == 1); - write_stack_to_ir(ctx, inst, true); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } _Py_UOpsOptIREntry *frame_ir_entry = ctx->frame->frame_ir_entry; - ir_frame_pop_info(ctx->ir); - ir_plain_inst(ctx->ir, *inst); + if (ir_frame_pop_info(ctx->ir) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *retval = PEEK(1); STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; @@ -1231,8 +1273,12 @@ uop_abstract_interpret_single_inst( } case SWAP: { - write_stack_to_ir(ctx, inst, true); - ir_plain_inst(ctx->ir, *inst); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } _Py_UOpsSymbolicExpression *top; _Py_UOpsSymbolicExpression *bottom; @@ -1258,8 +1304,12 @@ uop_abstract_interpret_single_inst( } case _SET_IP: case _CHECK_VALIDITY: - write_stack_to_ir(ctx, inst, true); - ir_plain_inst(ctx->ir, *inst); + if (write_stack_to_ir(ctx, inst, true) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } break; default: DPRINTF(1, "Unknown opcode in abstract interpreter\n"); @@ -1275,6 +1325,7 @@ uop_abstract_interpret_single_inst( } DPRINTF(3, "} \n"); } + assert(ctx->frame != NULL); DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); @@ -1337,12 +1388,16 @@ uop_abstract_interpret( !op_is_passthrough(curr->opcode)) { DPRINTF(3, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); if (first_impure) { - write_stack_to_ir(ctx, curr, false); + if (write_stack_to_ir(ctx, curr, false) < 0) { + goto error; + } clear_locals_type_info(ctx); } first_impure = false; ctx->curr_region_id++; - ir_plain_inst(ctx->ir, *curr); + if (ir_plain_inst(ctx->ir, *curr) < 0) { + goto error; + } } else { first_impure = true; @@ -1358,8 +1413,12 @@ uop_abstract_interpret( DPRINTF(3, "GUARD\n"); // Emit the state of the stack first. // Since this is a guard, copy over the type info - write_stack_to_ir(ctx, curr, true); - ir_plain_inst(ctx->ir, *curr); + if (write_stack_to_ir(ctx, curr, true) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *curr) < 0) { + goto error; + } } curr++; @@ -1367,7 +1426,9 @@ uop_abstract_interpret( } ctx->terminating = curr; - write_stack_to_ir(ctx, curr, false); + if (write_stack_to_ir(ctx, curr, false) < 0) { + goto error; + } return ctx; @@ -1680,5 +1741,6 @@ _Py_uop_analyze_and_optimize( // PyErr_Clear(); // } PyMem_Free(temp_writebuffer); + remove_unneeded_uops(buffer, buffer_size); return err_occurred ? -1 : 0; } \ No newline at end of file From 6b18e30d7cc3c724736f2381baf059759610c640 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 19 Jan 2024 06:37:13 +0800 Subject: [PATCH 014/111] expand the ir --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 603d94557da359..4555388a14ebd3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -394,7 +394,7 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - ir = _Py_UOpsSSA_IR_New(ir_entries); + ir = _Py_UOpsSSA_IR_New(ir_entries * OVERALLOCATE_FACTOR); if (ir == NULL) { goto error; } From 4b1dff444f3b4f9422a308f3a9f3f57ff0665cc2 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 19 Jan 2024 06:42:10 +0800 Subject: [PATCH 015/111] fix return path --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4555388a14ebd3..8b5b561bbc0fe7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -166,7 +166,7 @@ ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore { // Don't store stuff we know will never get compiled. if(_PyOpcode_isstackvalue(expr->inst.opcode) && store_fast_idx == TARGET_NONE) { - return; + return 0; } #ifdef Py_DEBUG char *uop_debug = Py_GETENV(DEBUG_ENV); From 6a61b1861fe5bdaac42b2369a1dcc3a73c1e2efb Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 19 Jan 2024 08:31:48 +0800 Subject: [PATCH 016/111] fix some refleaks --- Lib/test/test_capi/test_opt.py | 6 ++++-- Python/optimizer.c | 14 ++++++++++++++ Python/optimizer_analysis.c | 28 ++++++++++++++++------------ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e9012659542c53..b93330ce1437a7 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -576,13 +576,15 @@ def testfunc(loops): opt = _testinternalcapi.get_uop_optimizer() res = None with temporary_optimizer(opt): - res = testfunc(64) + res = testfunc(32) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - self.assertEqual(res, 127) + self.assertEqual(res, 63) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] self.assertEqual(len(binop_count), 3) + self.assertEqual(len(guard_both_int_count), 1) def test_int_impure_region(self): def testfunc(loops): diff --git a/Python/optimizer.c b/Python/optimizer.c index 27c13724e3b6e7..5a90b34f66c0fd 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -224,8 +224,22 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// +static void +clear_strong_refs_in_uops(_PyExecutorObject *self) +{ + Py_ssize_t uop_len = Py_SIZE(self); + _PyUOpInstruction *trace = &self->trace[0]; + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + } +} + static void uop_dealloc(_PyExecutorObject *self) { + clear_strong_refs_in_uops(self); _Py_ExecutorClear(self); PyObject_Free(self); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b5b561bbc0fe7..b242cf52d687f7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -24,7 +24,7 @@ #define OVERALLOCATE_FACTOR 2 #ifdef Py_DEBUG - static const char *DEBUG_ENV = "PY_OPT_DEBUG"; + static const char *DEBUG_ENV = "PYTHON_OPT_DEBUG"; #define DPRINTF(level, ...) \ if (lltrace >= (level)) { printf(__VA_ARGS__); } #else @@ -99,16 +99,6 @@ typedef struct _Py_UOpsSymbolicExpression { struct _Py_UOpsSymbolicExpression *operands[1]; } _Py_UOpsSymbolicExpression; - -static void -sym_dealloc(PyObject *o) -{ - _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)o; - // Note: we are not decerfing the symbolic expressions because we only hold - // a borrowed ref to them. The symexprs are kept alive by the global table. - Py_CLEAR(self->const_val); -} - typedef enum _Py_UOps_IRStore_IdKind { TARGET_NONE = -2, TARGET_UNUSED = -1, @@ -185,6 +175,7 @@ ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore entry->expr = expr; ir->curr_write++; if (ir->curr_write >= Py_SIZE(ir)) { + DPRINTF(1, "ir_store: ran out of space \n"); return -1; } return 0; @@ -209,6 +200,7 @@ ir_plain_inst(_Py_UOps_Opt_IR *ir, _PyUOpInstruction inst) entry->inst = inst; ir->curr_write++; if (ir->curr_write >= Py_SIZE(ir)) { + DPRINTF(1, "ir_plain_inst: ran out of space \n"); return -1; } return 0; @@ -231,6 +223,7 @@ ir_frame_push_info(_Py_UOps_Opt_IR *ir) entry->prev_frame_ir = NULL; ir->curr_write++; if (ir->curr_write >= Py_SIZE(ir)) { + DPRINTF(1, "ir_frame_push_info: ran out of space \n"); return NULL; } return entry; @@ -252,6 +245,7 @@ ir_frame_pop_info(_Py_UOps_Opt_IR *ir) entry->typ = IR_FRAME_POP_INFO; ir->curr_write++; if (ir->curr_write >= Py_SIZE(entry)) { + DPRINTF(1, "ir_frame_pop_info: ran out of space \n"); return -1; } return 0; @@ -338,6 +332,10 @@ abstractinterp_dealloc(PyObject *o) _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; Py_XDECREF(self->frame); Py_XDECREF(self->ir); + Py_ssize_t syms = Py_SIZE(o); + for (Py_ssize_t i = 0; i < syms; i++) { + Py_CLEAR(self->localsplus[i]); + } Py_TYPE(self)->tp_free((PyObject *)self); } @@ -756,7 +754,12 @@ sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbo { uint32_t immutables = (1 << NULL_TYPE | 1 << PYLONG_TYPE | 1 << PYFLOAT_TYPE | 1 << PYUNICODE_TYPE | 1 << SELF_OR_NULL); to_sym->sym_type.types = (from_sym->sym_type.types & immutables); - Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); + if (immutables) { + Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); + } + else { + Py_CLEAR(to_sym->const_val); + } } static void @@ -1728,6 +1731,7 @@ _Py_uop_analyze_and_optimize( memcpy(buffer, temp_writebuffer, buffer_size * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); + Py_DECREF(ctx); remove_unneeded_uops(buffer, buffer_size); From 9e32f75ade00d56a4c642764586d6c89ffe1f993 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:37:47 +0800 Subject: [PATCH 017/111] basic value numbering --- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_capi/test_opt.py | 57 +++++++ Python/abstract_interp_cases.c.h | 4 - Python/optimizer_analysis.c | 157 +++++++++++------- .../cases_generator/tier2_abstract_common.py | 1 + 5 files changed, 153 insertions(+), 68 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9355c3f3ffc9ca..4b4f3119f5885e 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -199,7 +199,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, - [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, + [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE_BORROW] = 0, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b93330ce1437a7..c35b56f46aabfb 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -629,6 +629,26 @@ def testfunc(loops): self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] self.assertEqual(len(binop_count), 3) + + def test_call_constant_propagate_past_impure(self): + def testfunc(n): + for i in range(n): + x = 1 + y = 1 + x // y + z = x + y + return z + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_int_large_pure_region(self): def testfunc(loops): num = 0 @@ -706,6 +726,27 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertIn("_LOAD_ATTR_CLASS", uops) + def test_call_constant_propagate_in_frame(self): + def testfunc(n): + def dummy(): + x = 1 + y = 1 + return x+y + for i in range(n): + x = dummy() + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_call_constant_propagate_through_frame(self): def testfunc(n): def dummy(x): @@ -725,6 +766,22 @@ def dummy(x): self.assertIn("_PUSH_FRAME", uops) self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_int_type_propagate_through_range(self): + def testfunc(n): + + for i in range(n): + x = i + i + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 19 * 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_GUARD_BOTH_INT", uops) def test_comprehension(self): def testfunc(n): diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 016e1c956f5a61..609ab60eb9a1b6 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1958,10 +1958,6 @@ break; } - case _SAVE_RETURN_OFFSET: { - break; - } - case _EXIT_TRACE: { break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b242cf52d687f7..b146079e63237f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -70,15 +70,29 @@ typedef enum { // Can't statically determine if self or null. SELF_OR_NULL = 10, + // Represents something from LOAD_CONST which is truly constant. + TRUE_CONST = 30, INVALID_TYPE = 31, } _Py_UOpsSymExprTypeEnum; +const uint32_t IMMUTABLES = + ( + 1 << NULL_TYPE | + 1 << PYLONG_TYPE | + 1 << PYFLOAT_TYPE | + 1 << PYUNICODE_TYPE | + 1 << SELF_OR_NULL | + 1 << TRUE_CONST + ); + #define MAX_TYPE_WITH_REFINEMENT 2 typedef struct { // bitmask of types uint32_t types; // refinement data for the types uint64_t refinement[MAX_TYPE_WITH_REFINEMENT + 1]; + // constant propagated value (might be NULL) + PyObject *const_val; } _Py_UOpsSymType; @@ -86,9 +100,9 @@ typedef struct _Py_UOpsSymbolicExpression { Py_ssize_t operand_count; Py_ssize_t idx; - // Type of the symbolic expression - _Py_UOpsSymType sym_type; - PyObject *const_val; + // Value numbering but only for types and constant values. + // https://en.wikipedia.org/wiki/Value_numbering + _Py_UOpsSymType *ty_number; // The region where this expression was first created. // This matters for anything that isn't immutable int originating_region; @@ -244,7 +258,7 @@ ir_frame_pop_info(_Py_UOps_Opt_IR *ir) _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; entry->typ = IR_FRAME_POP_INFO; ir->curr_write++; - if (ir->curr_write >= Py_SIZE(entry)) { + if (ir->curr_write >= Py_SIZE(ir)) { DPRINTF(1, "ir_frame_pop_info: ran out of space \n"); return -1; } @@ -290,8 +304,6 @@ static PyTypeObject _Py_UOpsAbstractFrame_Type = { }; typedef struct sym_arena { - // Current ID to assign a new (non-duplicate) sym_expr - Py_ssize_t sym_curr_id; char *curr_available; char *end; char *arena; @@ -313,7 +325,13 @@ typedef struct _Py_UOpsAbstractInterpContext { int curr_region_id; _Py_UOps_Opt_IR *ir; + // Arena for the symbolic expression themselves. sym_arena s_arena; + // Arena for the symbolic expressions' types. + // This is separate from the s_arena so that we can free + // all the constants easily. + int ty_curr_number; + _Py_UOpsSymType *ty_arena; // The terminating instruction for the trace. Could be _JUMP_TO_TOP or // _EXIT_TRACE. @@ -350,7 +368,7 @@ static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { }; static inline _Py_UOps_Opt_IR * -_Py_UOpsSSA_IR_New(int entries) +ssa_ir_new(int entries) { _Py_UOps_Opt_IR *ir = PyObject_NewVar(_Py_UOps_Opt_IR, &_Py_UOps_Opt_IR_Type, @@ -386,13 +404,20 @@ abstractinterp_context_new(PyCodeObject *co, _Py_UOpsAbstractInterpContext *self = NULL; _Py_UOps_Opt_IR *ir = NULL; char *arena = NULL; - Py_ssize_t arena_size = sizeof(_Py_UOpsSymbolicExpression) * ir_entries * OVERALLOCATE_FACTOR; + _Py_UOpsSymType *ty_arena = NULL; + Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicExpression)) * ir_entries * OVERALLOCATE_FACTOR; + Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; arena = (char *)PyMem_Malloc(arena_size); if (arena == NULL) { goto error; } - ir = _Py_UOpsSSA_IR_New(ir_entries * OVERALLOCATE_FACTOR); + ty_arena = (_Py_UOpsSymType *)PyMem_Malloc(ty_arena_size); + if (ty_arena == NULL) { + goto error; + } + + ir = ssa_ir_new(ir_entries * OVERALLOCATE_FACTOR); if (ir == NULL) { goto error; } @@ -415,11 +440,13 @@ abstractinterp_context_new(PyCodeObject *co, } self->curr_region_id = 0; + // Setup the arena for sym expressions. - self->s_arena.sym_curr_id = 0; self->s_arena.arena = arena; self->s_arena.curr_available = arena; self->s_arena.end = arena + arena_size; + self->ty_curr_number = 0; + self->ty_arena = ty_arena; // Frame setup self->new_frame_sym = NULL; @@ -447,6 +474,7 @@ abstractinterp_context_new(PyCodeObject *co, error: PyMem_Free(arena); + PyMem_Free(ty_arena); if (self != NULL) { self->s_arena.arena = NULL; } @@ -680,6 +708,9 @@ ctx_frame_pop( return 0; } +static void +sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj); + // Steals a reference to const_val // Creates a symbolic expression consisting of subexpressoins // from arr_start and va_list. @@ -695,19 +726,29 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, { int total_subexprs = num_arr + num_subexprs; + _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)ctx->s_arena.curr_available; ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicExpression) + sizeof(_Py_UOpsSymbolicExpression *) * total_subexprs; if (ctx->s_arena.curr_available >= ctx->s_arena.end) { return NULL; } + _Py_UOpsSymType *ty = &ctx->ty_arena[ctx->ty_curr_number]; + ctx->ty_curr_number++; + ty->const_val = NULL; + ty->types = 0; + + self->ty_number = ty; self->idx = -1; - self->sym_type.types = 1 << INVALID_TYPE; + self->ty_number->types = 0; self->inst = inst; - self->const_val = const_val; + if (const_val != NULL) { + sym_set_type_from_const(self, const_val); + } self->originating_region = ctx->curr_region_id; + // Setup int i = 0; _Py_UOpsSymbolicExpression **operands = self->operands; @@ -736,36 +777,34 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, static void sym_set_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) { - sym->sym_type.types |= 1 << typ; + sym->ty_number->types |= 1 << typ; if (typ <= MAX_TYPE_WITH_REFINEMENT) { - sym->sym_type.refinement[typ] = refinement; + sym->ty_number->refinement[typ] = refinement; } } static void -sym_copy_type(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) +sym_copy_type_number(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) { - to_sym->sym_type = from_sym->sym_type; - Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); + to_sym->ty_number = from_sym->ty_number; } +// Note: for this, to_sym MUST point to brand new sym. static void sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) { - uint32_t immutables = (1 << NULL_TYPE | 1 << PYLONG_TYPE | 1 << PYFLOAT_TYPE | 1 << PYUNICODE_TYPE | 1 << SELF_OR_NULL); - to_sym->sym_type.types = (from_sym->sym_type.types & immutables); - if (immutables) { - Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); - } - else { - Py_CLEAR(to_sym->const_val); + to_sym->ty_number->types = (from_sym->ty_number->types & IMMUTABLES); + if (to_sym->ty_number->types) { + to_sym->ty_number->const_val = Py_XNewRef(from_sym->ty_number->const_val); } } +// Steals a reference to obj static void sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj) { PyTypeObject *tp = Py_TYPE(obj); + sym->ty_number->const_val = obj; if (tp == &PyLong_Type) { sym_set_type(sym, PYLONG_TYPE, 0); @@ -797,8 +836,6 @@ sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj) sym_set_type(sym, GUARD_DORV_VALUES_TYPE, 0); } } - - sym_set_type(sym, GUARD_TYPE_VERSION_TYPE, tp->tp_version_tag); } @@ -831,6 +868,7 @@ static inline _Py_UOpsSymbolicExpression* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) { _PyUOpInstruction inst = {LOAD_CONST, const_idx, 0, 0}; + assert(const_val != NULL); _Py_UOpsSymbolicExpression *temp = _Py_UOpsSymbolicExpression_New( ctx, inst, @@ -842,26 +880,10 @@ sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int cons if (temp == NULL) { return NULL; } - sym_set_type_from_const(temp, const_val); + sym_set_type(temp, TRUE_CONST, 0); return temp; } -static _Py_UOpsSymbolicExpression* -sym_init_null(_Py_UOpsAbstractInterpContext *ctx) -{ - if (ctx->frequent_syms.nulL_sym != NULL) { - return ctx->frequent_syms.nulL_sym; - } - _Py_UOpsSymbolicExpression *null_sym = sym_init_unknown(ctx); - if (null_sym == NULL) { - return NULL; - } - sym_set_type(null_sym, NULL_TYPE, 0); - ctx->frequent_syms.nulL_sym = null_sym; - - return null_sym; -} - static _Py_UOpsSymbolicExpression* sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) { @@ -881,7 +903,7 @@ sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) static inline bool sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) { - if ((sym->sym_type.types & (1 << typ)) == 0) { + if ((sym->ty_number->types & (1 << typ)) == 0) { return false; } return true; @@ -894,7 +916,7 @@ sym_matches_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, u return false; } if (typ <= MAX_TYPE_WITH_REFINEMENT) { - return sym->sym_type.refinement[typ] == refinement; + return sym->ty_number->refinement[typ] == refinement; } return true; } @@ -904,7 +926,7 @@ sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum { assert(sym_is_type(sym, typ)); assert(typ <= MAX_TYPE_WITH_REFINEMENT); - return sym->sym_type.refinement[typ]; + return sym->ty_number->refinement[typ]; } @@ -929,7 +951,8 @@ op_is_pure(uint32_t opcode) static inline bool op_is_bookkeeping(uint32_t opcode) { return (opcode == _SET_IP || - opcode == _CHECK_VALIDITY); + opcode == _CHECK_VALIDITY || + opcode == _SAVE_RETURN_OFFSET); } static inline bool @@ -941,13 +964,13 @@ op_is_specially_handled(uint32_t opcode) static inline bool is_const(_Py_UOpsSymbolicExpression *expr) { - return expr->const_val != NULL; + return expr->ty_number->const_val != NULL; } static inline PyObject * get_const(_Py_UOpsSymbolicExpression *expr) { - return Py_NewRef(expr->const_val); + return Py_NewRef(expr->ty_number->const_val); } @@ -974,7 +997,7 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b goto error; } if (copy_types) { - sym_copy_type(ctx->frame->stack[i], new_stack); + sym_copy_type_number(ctx->frame->stack[i], new_stack); } else { sym_copy_immutable_type_info(ctx->frame->stack[i], new_stack); } @@ -986,13 +1009,18 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b return -1; } -static void +static int clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { int locals_entries = ctx->frame->locals_len; for (int i = 0; i < locals_entries; i++) { - // clears out all types except immutables. - sym_copy_immutable_type_info(ctx->frame->locals[i], ctx->frame->locals[i]); + _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, i); + if (new_local == NULL) { + return -1; + } + sym_copy_immutable_type_info(ctx->frame->locals[i], new_local); + ctx->frame->locals[i] = new_local; } + return 0; } typedef enum { @@ -1139,6 +1167,7 @@ uop_abstract_interpret_single_inst( STACK_GROW(1); PEEK(1) = (_Py_UOpsSymbolicExpression *)GETITEM( ctx, oparg); + assert(PEEK(1)->ty_number->const_val != NULL); break; } case STORE_FAST_MAYBE_NULL: @@ -1151,7 +1180,7 @@ uop_abstract_interpret_single_inst( if (new_local == NULL) { goto error; } - sym_copy_type(value, new_local); + sym_copy_type_number(value, new_local); GETLOCAL(oparg) = new_local; STACK_SHRINK(1); break; @@ -1170,7 +1199,7 @@ uop_abstract_interpret_single_inst( goto error; } PEEK(1) = temp; - sym_copy_type(bottom, temp); + sym_copy_type_number(bottom, temp); break; } @@ -1238,7 +1267,7 @@ uop_abstract_interpret_single_inst( // Cannot determine statically, so we can't propagate types. if (!sym_is_type(self_or_null, SELF_OR_NULL)) { for (int i = 0; i < argcount; i++) { - sym_copy_type(args[i], ctx->frame->locals[i]); + sym_copy_type_number(args[i], ctx->frame->locals[i]); } } break; @@ -1271,7 +1300,7 @@ uop_abstract_interpret_single_inst( goto error; } PEEK(1) = new_retval; - sym_copy_type(retval, new_retval); + sym_copy_type_number(retval, new_retval); break; } @@ -1293,13 +1322,13 @@ uop_abstract_interpret_single_inst( if (new_top == NULL) { goto error; } - sym_copy_type(top, new_top); + sym_copy_type_number(top, new_top); _Py_UOpsSymbolicExpression *new_bottom = sym_init_unknown(ctx); if (new_bottom == NULL) { goto error; } - sym_copy_type(bottom, new_bottom); + sym_copy_type_number(bottom, new_bottom); stack_pointer[-2 - (oparg-2)] = new_top; stack_pointer[-1] = new_bottom; @@ -1307,6 +1336,7 @@ uop_abstract_interpret_single_inst( } case _SET_IP: case _CHECK_VALIDITY: + case _SAVE_RETURN_OFFSET: if (write_stack_to_ir(ctx, inst, true) < 0) { goto error; } @@ -1394,7 +1424,9 @@ uop_abstract_interpret( if (write_stack_to_ir(ctx, curr, false) < 0) { goto error; } - clear_locals_type_info(ctx); + if (clear_locals_type_info(ctx) < 0) { + goto error; + } } first_impure = false; ctx->curr_region_id++; @@ -1516,7 +1548,7 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, } // Constant propagated value, load immediate constant - if (sym->const_val != NULL && !_PyOpcode_isstackvalue(sym->inst.opcode)) { + if (sym->ty_number->const_val != NULL && !_PyOpcode_isstackvalue(sym->inst.opcode)) { // Shrink the stack if operands consist of stack values. // We don't need them anymore. This could happen because // the operands first need to be guarded and the guard could not @@ -1533,8 +1565,7 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, inst.opcode = _LOAD_CONST_INLINE; inst.oparg = sym->inst.oparg; - // TODO memory leak. - inst.operand = (uint64_t)Py_NewRef(sym->const_val); + inst.operand = (uint64_t)Py_NewRef(sym->ty_number->const_val); return emit_i(emitter, inst); } diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index b5af7222762225..553a92d4b14b42 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -17,4 +17,5 @@ # Bookkeeping "_SET_IP", "_CHECK_VALIDITY", + "_SAVE_RETURN_OFFSET", } From 119548ecc5c5a3aa0871bac2ae670b4a81abd2cd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:49:18 +0800 Subject: [PATCH 018/111] force enable uops --- Python/pylifecycle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0d5eec06e9b458..c7666c7b142d95 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1231,6 +1231,7 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } + enabled = 1; // TEMPORARY: always enable if (enabled) { PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { From 27ce303bcff18427df8cd94834b4bfe5b57d69d1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Jan 2024 06:18:19 +0800 Subject: [PATCH 019/111] allow more memory --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b146079e63237f..45cba4f18cecd6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -21,7 +21,7 @@ #define MAX_ABSTRACT_INTERP_SIZE 2048 -#define OVERALLOCATE_FACTOR 2 +#define OVERALLOCATE_FACTOR 3 #ifdef Py_DEBUG static const char *DEBUG_ENV = "PYTHON_OPT_DEBUG"; From 89157629f2b04ea8d0a042b957d8119af840f1be Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Jan 2024 12:02:23 +0800 Subject: [PATCH 020/111] fix some refleaks, test value numbering --- Lib/test/test_capi/test_opt.py | 24 ++++++ Python/abstract_interp_cases.c.h | 8 -- Python/optimizer_analysis.c | 86 +++++++++++-------- .../tier2_abstract_generator.py | 21 +++++ 4 files changed, 95 insertions(+), 44 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c35b56f46aabfb..beeea35d961d6e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -783,6 +783,30 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertNotIn("_GUARD_BOTH_INT", uops) + def test_int_value_nubmering(self): + def testfunc(n): + + y = 1 + for i in range(n): + x = y + z = x + a = z + b = a + res = x + z + a + b + return res + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_GUARD_BOTH_INT", uops) + guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertEqual(len(guard_count), 1) + def test_comprehension(self): def testfunc(n): for _ in range(n): diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 609ab60eb9a1b6..6e96d2d2e52cf4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -176,8 +176,6 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); @@ -208,8 +206,6 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); @@ -240,8 +236,6 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); @@ -427,8 +421,6 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = PyUnicode_Concat(left, right); - _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 45cba4f18cecd6..8caf1409c9884d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -309,6 +309,12 @@ typedef struct sym_arena { char *arena; } sym_arena; +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UOpsSymType *arena; +} ty_arena; + typedef struct frequent_syms { _Py_UOpsSymbolicExpression *nulL_sym; _Py_UOpsSymbolicExpression *push_nulL_sym; @@ -330,8 +336,7 @@ typedef struct _Py_UOpsAbstractInterpContext { // Arena for the symbolic expressions' types. // This is separate from the s_arena so that we can free // all the constants easily. - int ty_curr_number; - _Py_UOpsSymType *ty_arena; + ty_arena t_arena; // The terminating instruction for the trace. Could be _JUMP_TO_TOP or // _EXIT_TRACE. @@ -350,10 +355,14 @@ abstractinterp_dealloc(PyObject *o) _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; Py_XDECREF(self->frame); Py_XDECREF(self->ir); - Py_ssize_t syms = Py_SIZE(o); - for (Py_ssize_t i = 0; i < syms; i++) { - Py_CLEAR(self->localsplus[i]); + if (self->s_arena.arena != NULL) { + int tys = self->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(self->t_arena.arena[i].const_val); + } } + PyMem_Free(self->t_arena.arena); + PyMem_Free(self->s_arena.arena); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -404,7 +413,7 @@ abstractinterp_context_new(PyCodeObject *co, _Py_UOpsAbstractInterpContext *self = NULL; _Py_UOps_Opt_IR *ir = NULL; char *arena = NULL; - _Py_UOpsSymType *ty_arena = NULL; + _Py_UOpsSymType *t_arena = NULL; Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicExpression)) * ir_entries * OVERALLOCATE_FACTOR; Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; arena = (char *)PyMem_Malloc(arena_size); @@ -412,8 +421,8 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - ty_arena = (_Py_UOpsSymType *)PyMem_Malloc(ty_arena_size); - if (ty_arena == NULL) { + t_arena = (_Py_UOpsSymType *)PyMem_Malloc(ty_arena_size); + if (t_arena == NULL) { goto error; } @@ -444,9 +453,11 @@ abstractinterp_context_new(PyCodeObject *co, // Setup the arena for sym expressions. self->s_arena.arena = arena; self->s_arena.curr_available = arena; + assert(arena_size > 0); self->s_arena.end = arena + arena_size; - self->ty_curr_number = 0; - self->ty_arena = ty_arena; + self->t_arena.ty_curr_number = 0; + self->t_arena.arena = t_arena; + self->t_arena.ty_max_number = ir_entries * OVERALLOCATE_FACTOR; // Frame setup self->new_frame_sym = NULL; @@ -474,8 +485,10 @@ abstractinterp_context_new(PyCodeObject *co, error: PyMem_Free(arena); - PyMem_Free(ty_arena); + PyMem_Free(t_arena); if (self != NULL) { + // Important so we don't double free them. + self->t_arena.arena = NULL; self->s_arena.arena = NULL; } self->frame = NULL; @@ -507,7 +520,7 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) return sym_consts; error: - Py_DECREF(sym_consts); + PyMem_Free(sym_consts); return NULL; } @@ -584,7 +597,7 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame = PyObject_New(_Py_UOpsAbstractFrame, &_Py_UOpsAbstractFrame_Type); if (frame == NULL) { - Py_DECREF(sym_consts); + PyMem_Free(sym_consts); return NULL; } @@ -620,13 +633,13 @@ char *uop_debug = Py_GETENV(DEBUG_ENV); case _INIT_CALL_PY_EXACT_ARGS: { _Py_UOpsSymbolicExpression *callable_sym = frame_sym->operands[0]; if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { - DPRINTF(2, "error: _PUSH_FRAME not function type\n"); + DPRINTF(1, "error: _PUSH_FRAME not function type\n"); return NULL; } uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); if (func == NULL) { - DPRINTF(2, "error: _PUSH_FRAME cannot find func version\n"); + DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); return NULL; } return func; @@ -724,17 +737,29 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsSymbolicExpression **arr_start, int num_subexprs, ...) { +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif int total_subexprs = num_arr + num_subexprs; _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)ctx->s_arena.curr_available; ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicExpression) + sizeof(_Py_UOpsSymbolicExpression *) * total_subexprs; if (ctx->s_arena.curr_available >= ctx->s_arena.end) { + DPRINTF(1, "out of space for symbolic expression\n"); return NULL; } - _Py_UOpsSymType *ty = &ctx->ty_arena[ctx->ty_curr_number]; - ctx->ty_curr_number++; + _Py_UOpsSymType *ty = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + DPRINTF(1, "out of space for symbolic expression type\n"); + return NULL; + } + ctx->t_arena.ty_curr_number++; ty->const_val = NULL; ty->types = 0; @@ -970,7 +995,7 @@ is_const(_Py_UOpsSymbolicExpression *expr) static inline PyObject * get_const(_Py_UOpsSymbolicExpression *expr) { - return Py_NewRef(expr->ty_number->const_val); + return expr->ty_number->const_val; } @@ -1006,6 +1031,7 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b return 0; error: + DPRINTF(1, "write_stack_to_ir error\n"); return -1; } @@ -1032,21 +1058,9 @@ typedef enum { #define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ do { \ - if (Py_REFCNT(left) == 1) { \ - ((PyFloatObject *)left)->ob_fval = (dval); \ - _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\ - result = (left); \ - } \ - else if (Py_REFCNT(right) == 1) {\ - ((PyFloatObject *)right)->ob_fval = (dval); \ - _Py_DECREF_NO_DEALLOC(left); \ - result = (right); \ - }\ - else { \ + { \ result = PyFloat_FromDouble(dval); \ if ((result) == NULL) goto error; \ - _Py_DECREF_NO_DEALLOC(left); \ - _Py_DECREF_NO_DEALLOC(right); \ } \ } while (0) @@ -1235,6 +1249,7 @@ uop_abstract_interpret_single_inst( goto error; } + assert(ctx->new_frame_sym != NULL); PyFunctionObject *func = extract_func_from_sym(ctx->new_frame_sym); if (func == NULL) { goto error; @@ -1243,7 +1258,6 @@ uop_abstract_interpret_single_inst( _Py_UOpsSymbolicExpression *self_or_null = extract_self_or_null_from_sym(ctx->new_frame_sym); assert(self_or_null != NULL); - assert(ctx->new_frame_sym != NULL); _Py_UOpsSymbolicExpression **args = extract_args_from_sym(ctx->new_frame_sym); assert(args != NULL); ctx->new_frame_sym = NULL; @@ -1468,6 +1482,7 @@ uop_abstract_interpret( return ctx; error: + Py_XDECREF(ctx); return NULL; } @@ -1729,6 +1744,7 @@ _Py_uop_analyze_and_optimize( { _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; + _Py_UOpsAbstractInterpContext *ctx = NULL; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size * OVERALLOCATE_FACTOR); if (temp_writebuffer == NULL) { @@ -1737,7 +1753,7 @@ _Py_uop_analyze_and_optimize( // Pass: Abstract interpretation and symbolic analysis - _Py_UOpsAbstractInterpContext *ctx = uop_abstract_interpret( + ctx = uop_abstract_interpret( co, buffer, buffer_size, curr_stacklen); @@ -1768,13 +1784,11 @@ _Py_uop_analyze_and_optimize( return 0; error: + Py_XDECREF(ctx); // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. err_occurred = PyErr_Occurred(); -// if (err_occurred && !PyErr_ExceptionMatches(PyExc_MemoryError)) { -// PyErr_Clear(); -// } PyMem_Free(temp_writebuffer); remove_unneeded_uops(buffer, buffer_size); return err_occurred ? -1 : 0; diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 93bfa1b87d166c..dc031f85c10c2c 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -96,9 +96,30 @@ def tier2_replace_deopt( out.emit(") goto error;\n") +def tier2_replace_decref_specialized( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, +) -> None: + parens = 1 + next(tkn_iter) # LPAREN + for tkn in tkn_iter: + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if parens == 0: + break + next(tkn_iter) # SEMICOLON + + TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy() TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt +TIER2_REPLACEMENT_FUNCTIONS["_Py_DECREF_SPECIALIZED"] = tier2_replace_decref_specialized def _write_body_abstract_interp_impure_uop( From 0974dad14f8f40e6f92c8140048f63498e6c8144 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Jan 2024 12:40:42 +0800 Subject: [PATCH 021/111] fix all the warnings --- Python/abstract_interp_cases.c.h | 12 ++---- Python/optimizer_analysis.c | 5 +-- .../tier2_abstract_generator.py | 40 +++++++++++-------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6e96d2d2e52cf4..68cad564c47d41 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -621,7 +621,7 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - PyObject **__values_; + _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); @@ -632,7 +632,7 @@ } case _UNPACK_SEQUENCE_TUPLE: { - PyObject **__values_; + _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); @@ -643,7 +643,7 @@ } case _UNPACK_SEQUENCE_LIST: { - PyObject **__values_; + _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); @@ -949,8 +949,6 @@ } case _CHECK_MANAGED_OBJECT_HAS_VALUES: { - _Py_UOpsSymbolicExpression *__owner_; - __owner_ = stack_pointer[-1]; goto guard_required; break; } @@ -1698,8 +1696,6 @@ } case _CHECK_STACK_SPACE: { - _Py_UOpsSymbolicExpression *__callable_; - __callable_ = stack_pointer[-2 - oparg]; goto guard_required; break; } @@ -1708,7 +1704,7 @@ _Py_UOpsSymbolicExpression **__args_; _Py_UOpsSymbolicExpression *__self_or_null_; _Py_UOpsSymbolicExpression *__callable_; - _PyInterpreterFrame *__new_frame_; + _Py_UOpsSymbolicExpression *__new_frame_; __args_ = &stack_pointer[-oparg]; __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8caf1409c9884d..55e8c042d4e889 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -593,7 +593,6 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, if (sym_consts == NULL) { return NULL; } - int total_len = stack_len + locals_len; _Py_UOpsAbstractFrame *frame = PyObject_New(_Py_UOpsAbstractFrame, &_Py_UOpsAbstractFrame_Type); if (frame == NULL) { @@ -1011,9 +1010,9 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b DPRINTF(3, "write_stack_to_ir\n"); #endif // Emit the state of the stack first. - int stack_entries = ctx->frame->stack_pointer - ctx->frame->stack; + Py_ssize_t stack_entries = ctx->frame->stack_pointer - ctx->frame->stack; assert(stack_entries <= ctx->frame->stack_len); - for (int i = 0; i < stack_entries; i++) { + for (Py_ssize_t i = 0; i < stack_entries; i++) { if (ir_store(ctx->ir, ctx->frame->stack[i], TARGET_NONE) < 0) { goto error; } diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index dc031f85c10c2c..4c3177429e6f75 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -55,14 +55,17 @@ def declare_variables( skip_inputs: bool = False, skip_peeks: bool = False, ) -> None: + # Don't declare anything for these guards, they will always be evaluated. + if uop.properties.guard and uop.name in NO_CONST_OR_TYPE_EVALUATE: + return variables = set(UNUSED) if not skip_inputs: for var in reversed(uop.stack.inputs): if skip_peeks and var.peek: continue if var.name not in variables: - type = var.type if var.type else default_type - if var.size > "1" and type == "PyObject **": + type = default_type + if var.size != "1" and var.type == "PyObject **": type = "_Py_UOpsSymbolicExpression **" variables.add(var.name) if var.condition: @@ -74,7 +77,9 @@ def declare_variables( continue if var.name not in variables: variables.add(var.name) - type = var.type if var.type else default_type + type = default_type + if var.size != "1" and var.type == "PyObject **": + type = "_Py_UOpsSymbolicExpression **" if var.condition: out.emit(f"{type}{var.name} = NULL;\n") else: @@ -353,20 +358,21 @@ def _write_body_abstract_interp_guard_uop( def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: try: out.start_line() - for var in reversed(mangled_uop.stack.inputs): - is_impure = ( - not mangled_uop.properties.pure and not mangled_uop.properties.guard - ) - old_var_name = var.name - # code smell, but basically impure ops don't use any of their inputs - if is_impure: - var.name = "unused" - out.emit(stack.pop(var)) - var.name = old_var_name + # These types of guards do not need the stack at all. + if not (mangled_uop.properties.guard and mangled_uop.name in NO_CONST_OR_TYPE_EVALUATE): + for var in reversed(mangled_uop.stack.inputs): + is_impure = ( + not mangled_uop.properties.pure and not mangled_uop.properties.guard + ) + old_var_name = var.name + # code smell, but basically impure ops don't use any of their inputs + if is_impure: + var.name = "unused" + out.emit(stack.pop(var)) + var.name = old_var_name if not mangled_uop.properties.stores_sp: for i, var in enumerate(mangled_uop.stack.outputs): out.emit(stack.push(var)) - # emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) if uop.properties.pure: _write_body_abstract_interp_pure_uop(mangled_uop, uop, out, stack) elif uop.properties.guard: @@ -406,13 +412,15 @@ def generate_tier2_abstract( continue out.emit(f"case {uop.name}: {{\n") mangled_uop = mangle_uop_names(uop) - is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard + is_impure = not (mangled_uop.properties.pure or mangled_uop.properties.guard) declare_variables(mangled_uop, out, skip_inputs=is_impure, skip_peeks=is_impure) stack = Stack() write_abstract_uop(mangled_uop, uop, out, stack) out.start_line() if not uop.properties.always_exits: - stack.flush(out) + # Guards strictly only peek + if not uop.properties.guard: + stack.flush(out) out.emit("break;\n") out.start_line() out.emit("}") From c9cd8a8018735dd75ff02ddec4b2ddc77f7e9db2 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:04:20 +0800 Subject: [PATCH 022/111] fix test --- Lib/test/test_capi/test_mem.py | 5 +++++ Python/abstract_interp_cases.c.h | 12 ++++++------ Python/optimizer_analysis.c | 2 -- Tools/cases_generator/tier2_abstract_generator.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 04f17a9ec9e72a..3c81fd22e67287 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -118,6 +118,9 @@ def test_pyobject_freed_is_freed(self): def test_set_nomemory(self): code = """if 1: import _testcapi + import _testinternalcapi + old_opt = _testinternalcapi.get_optimizer() + _testinternalcapi.set_optimizer(None) class C(): pass @@ -141,6 +144,8 @@ class C(): pass print('MemoryError', outer_cnt, j) _testcapi.remove_mem_hooks() break + + _testinternalcapi.set_optimizer(old_opt) """ rc, out, err = assert_python_ok('-c', code) lines = out.splitlines() diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 68cad564c47d41..4fffa000c324a7 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -612,7 +612,7 @@ } case _UNPACK_SEQUENCE: { - for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } @@ -623,7 +623,7 @@ case _UNPACK_SEQUENCE_TWO_TUPLE: { _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; - for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } @@ -634,7 +634,7 @@ case _UNPACK_SEQUENCE_TUPLE: { _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; - for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } @@ -645,7 +645,7 @@ case _UNPACK_SEQUENCE_LIST: { _Py_UOpsSymbolicExpression **__values_; __values_ = &stack_pointer[-1]; - for (int case_gen_i = 0; case_gen_i < oparg; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } @@ -654,11 +654,11 @@ } case _UNPACK_EX: { - for (int case_gen_i = 0; case_gen_i < oparg & 0xFF; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg & 0xFF); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } - for (int case_gen_i = 0; case_gen_i < oparg >> 8; case_gen_i++) { + for (int case_gen_i = 0; case_gen_i < (oparg >> 8); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 55e8c042d4e889..abac91a71baccf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1236,7 +1236,6 @@ uop_abstract_interpret_single_inst( case _PUSH_FRAME: { int argcount = oparg; - _Py_UOpsAbstractFrame *old_frame = ctx->frame; // TOS is the new frame. if (write_stack_to_ir(ctx, inst, true) < 0) { goto error; @@ -1291,7 +1290,6 @@ uop_abstract_interpret_single_inst( if (write_stack_to_ir(ctx, inst, true) < 0) { goto error; } - _Py_UOpsOptIREntry *frame_ir_entry = ctx->frame->frame_ir_entry; if (ir_frame_pop_info(ctx->ir) < 0) { goto error; } diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 4c3177429e6f75..189aa8f0af2a2f 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -146,7 +146,7 @@ def _write_body_abstract_interp_impure_uop( else: # See UNPACK_SEQUENCE for when we need this. out.emit( - f"for (int case_gen_i = 0; case_gen_i < {var.size}; case_gen_i++) {{\n" + f"for (int case_gen_i = 0; case_gen_i < ({var.size}); case_gen_i++) {{\n" ) out.emit(f"*(stack_pointer + case_gen_i) = sym_init_unknown(ctx);\n") out.emit(f"if(*(stack_pointer + case_gen_i) == NULL) goto error;\n") From eb56a907c9cbd2261062dcf355dde44754c5fccd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:14:49 +0800 Subject: [PATCH 023/111] fix refleak tests --- Lib/test/test_capi/test_opt.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index beeea35d961d6e..9a654e389771ad 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -28,7 +28,19 @@ def clear_executors(func): func.__code__ = func.__code__.replace() -class TestOptimizerAPI(unittest.TestCase): +class UopsTestCase(unittest.TestCase): + def setUp(self): + """For the sake of refleak tests, we need to disable any + current optimizers we might have completely. + """ + self.old = _testinternalcapi.get_optimizer() + _testinternalcapi.set_optimizer(None) + + def tearDown(self): + _testinternalcapi.set_optimizer(self.old) + + +class TestOptimizerAPI(UopsTestCase): def test_get_counter_optimizer_dealloc(self): # See gh-108727 @@ -121,7 +133,7 @@ def get_first_executor(func): return None -class TestExecutorInvalidation(unittest.TestCase): +class TestExecutorInvalidation(UopsTestCase): def setUp(self): self.old = _testinternalcapi.get_optimizer() @@ -183,7 +195,7 @@ def f(): _testinternalcapi.invalidate_executors(f.__code__) self.assertFalse(exe.is_valid()) -class TestUops(unittest.TestCase): +class TestUops(UopsTestCase): def test_basic_loop(self): def testfunc(x): @@ -542,7 +554,7 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) -class TestUopsOptimization(unittest.TestCase): +class TestUopsOptimization(UopsTestCase): def test_int_constant_propagation(self): def testfunc(loops): From c9c7854d643f5446faf79b2cb07739fe7733cf25 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:15:28 +0800 Subject: [PATCH 024/111] lint --- Lib/test/test_capi/test_mem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 3c81fd22e67287..0aad2cc46d5afa 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -144,7 +144,7 @@ class C(): pass print('MemoryError', outer_cnt, j) _testcapi.remove_mem_hooks() break - + _testinternalcapi.set_optimizer(old_opt) """ rc, out, err = assert_python_ok('-c', code) From 3e64d1f3d271a17df666f6919570afa054984f0e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:38:03 +0800 Subject: [PATCH 025/111] get rid of all compiler warnings --- Python/abstract_interp_cases.c.h | 8 +------- Python/optimizer_analysis.c | 2 +- Tools/cases_generator/stack.py | 4 ++-- .../tier2_abstract_generator.py | 18 +++++++++++++----- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 4fffa000c324a7..1e1140f8c549c1 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -621,8 +621,6 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - _Py_UOpsSymbolicExpression **__values_; - __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; @@ -632,8 +630,6 @@ } case _UNPACK_SEQUENCE_TUPLE: { - _Py_UOpsSymbolicExpression **__values_; - __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; @@ -643,8 +639,6 @@ } case _UNPACK_SEQUENCE_LIST: { - _Py_UOpsSymbolicExpression **__values_; - __values_ = &stack_pointer[-1]; for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); if(*(stack_pointer + case_gen_i) == NULL) goto error; @@ -1709,7 +1703,7 @@ __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; __new_frame_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, oparg, __args_, 2 , __callable_, __self_or_null_); - stack_pointer[-2 - oparg] = (PyObject *)__new_frame_; + stack_pointer[-2 - oparg] = (_Py_UOpsSymbolicExpression *)__new_frame_; stack_pointer += -1 - oparg; break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index abac91a71baccf..efa085492128dd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -160,7 +160,7 @@ static PyTypeObject _Py_UOps_Opt_IR_Type = { .tp_name = "uops SSA IR", .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), .tp_itemsize = sizeof(_Py_UOpsOptIREntry), - .tp_dealloc = PyObject_Del, + .tp_dealloc = (destructor)PyObject_Del, .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index f2d166cf101e63..28ad9507b8e0bb 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,11 +168,11 @@ def push(self, var: StackItem) -> str: self.top_offset.push(var) return "" - def flush(self, out: CWriter) -> None: + def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None: out.start_line() for var in self.variables: if not var.peek: - cast = "(PyObject *)" if var.type else "" + cast = f"({cast_type})" if var.type else "" if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 189aa8f0af2a2f..4919aac133ee14 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -75,6 +75,8 @@ def declare_variables( for var in uop.stack.outputs: if skip_peeks and var.peek: continue + if var.size != "1": + continue if var.name not in variables: variables.add(var.name) type = default_type @@ -358,12 +360,13 @@ def _write_body_abstract_interp_guard_uop( def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: try: out.start_line() + is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard # These types of guards do not need the stack at all. - if not (mangled_uop.properties.guard and mangled_uop.name in NO_CONST_OR_TYPE_EVALUATE): + if not ( + mangled_uop.properties.guard + and mangled_uop.name in NO_CONST_OR_TYPE_EVALUATE + ): for var in reversed(mangled_uop.stack.inputs): - is_impure = ( - not mangled_uop.properties.pure and not mangled_uop.properties.guard - ) old_var_name = var.name # code smell, but basically impure ops don't use any of their inputs if is_impure: @@ -372,7 +375,12 @@ def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) - var.name = old_var_name if not mangled_uop.properties.stores_sp: for i, var in enumerate(mangled_uop.stack.outputs): + old_var_name = var.name + # Code smell, but impure variadic ops don't use their outputs either. + if is_impure and var.size != "1": + var.name = "unused" out.emit(stack.push(var)) + var.name = old_var_name if uop.properties.pure: _write_body_abstract_interp_pure_uop(mangled_uop, uop, out, stack) elif uop.properties.guard: @@ -420,7 +428,7 @@ def generate_tier2_abstract( if not uop.properties.always_exits: # Guards strictly only peek if not uop.properties.guard: - stack.flush(out) + stack.flush(out, cast_type="_Py_UOpsSymbolicExpression *") out.emit("break;\n") out.start_line() out.emit("}") From df3b938b1b1678a09cc92cebe278736bc4650171 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:39:14 +0800 Subject: [PATCH 026/111] fix smelly --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index efa085492128dd..893d4596d1ac28 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -75,7 +75,7 @@ typedef enum { INVALID_TYPE = 31, } _Py_UOpsSymExprTypeEnum; -const uint32_t IMMUTABLES = +static const uint32_t IMMUTABLES = ( 1 << NULL_TYPE | 1 << PYLONG_TYPE | From 81a859bc32c7b93e51575ed2324cbdcfe61957aa Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 02:57:11 +0800 Subject: [PATCH 027/111] add comments, cleanup --- Python/optimizer_analysis.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 893d4596d1ac28..1a236e57c57ece 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -31,8 +31,9 @@ #define DPRINTF(level, ...) #endif +// This represents a value that "terminates" the symbolic. static inline bool -_PyOpcode_isterminal(uint32_t opcode) +op_is_terminal(uint32_t opcode) { return (opcode == _LOAD_FAST || opcode == _LOAD_FAST_CHECK || @@ -42,13 +43,14 @@ _PyOpcode_isterminal(uint32_t opcode) opcode == PUSH_NULL); } +// This represents a value that is already on the stack. static inline bool -_PyOpcode_isstackvalue(uint32_t opcode) +op_is_stackvalue(uint32_t opcode) { return (opcode == CACHE); } - +// See the interpreter DSL in ./Tools/cases_generator/interpreter_definition.md for what these correspond to. typedef enum { // Types with refinement info GUARD_KEYS_VERSION_TYPE = 0, @@ -119,6 +121,14 @@ typedef enum _Py_UOps_IRStore_IdKind { TARGET_LOCAL = 0, } _Py_UOps_IRStore_IdKind; +/* + * The IR has the following types: + * IR_PLAIN_INST - a plain CPython bytecode instruction + * IR_SYMBOLIC - assign a target the value of a symbolic expression + * IR_FRAME_PUSH_INFO - _PUSH_FRAME + * IR_FRAME_POP_INFO - _POP_FRAME + * IR_NOP - nop + */ typedef enum _Py_UOps_IRStore_EntryKind { IR_PLAIN_INST = 0, IR_SYMBOLIC = 1, @@ -169,7 +179,7 @@ static int ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore_IdKind store_fast_idx) { // Don't store stuff we know will never get compiled. - if(_PyOpcode_isstackvalue(expr->inst.opcode) && store_fast_idx == TARGET_NONE) { + if(op_is_stackvalue(expr->inst.opcode) && store_fast_idx == TARGET_NONE) { return 0; } #ifdef Py_DEBUG @@ -1531,7 +1541,7 @@ count_stack_operands(_Py_UOpsSymbolicExpression *sym) { int total = 0; for (Py_ssize_t i = 0; i < sym->operand_count; i++) { - if (_PyOpcode_isstackvalue(sym->operands[i]->inst.opcode)) { + if (op_is_stackvalue(sym->operands[i]->inst.opcode)) { total++; } } @@ -1547,9 +1557,9 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, // Since CPython is a stack machine, just compile in the order // seen in the operands, then the instruction itself. - if (_PyOpcode_isterminal(sym->inst.opcode)) { + if (op_is_terminal(sym->inst.opcode)) { // These are for unknown stack entries. - if (_PyOpcode_isstackvalue(sym->inst.opcode)) { + if (op_is_stackvalue(sym->inst.opcode)) { // Leave it be. These are initial values from the start return 0; } @@ -1560,7 +1570,7 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, } // Constant propagated value, load immediate constant - if (sym->ty_number->const_val != NULL && !_PyOpcode_isstackvalue(sym->inst.opcode)) { + if (sym->ty_number->const_val != NULL && !op_is_stackvalue(sym->inst.opcode)) { // Shrink the stack if operands consist of stack values. // We don't need them anymore. This could happen because // the operands first need to be guarded and the guard could not From 7280281695af2e0f6ead7cf82b4bd3aeb6a887b9 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 03:09:59 +0800 Subject: [PATCH 028/111] exclude abstract interp cases from C analyzer --- Tools/c-analyzer/cpython/_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 444063d2148934..dffb0c46984c50 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -83,6 +83,7 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/abstract_interp_cases.c.h # not actually source Python/bytecodes.c From ac6e29fbfb89729d4295ecfbb4355a8e54e5cd4d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Jan 2024 03:25:24 +0800 Subject: [PATCH 029/111] fix c-globals check --- Python/optimizer_analysis.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 1a236e57c57ece..b527504086c8f2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -24,7 +24,7 @@ #define OVERALLOCATE_FACTOR 3 #ifdef Py_DEBUG - static const char *DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; #define DPRINTF(level, ...) \ if (lltrace >= (level)) { printf(__VA_ARGS__); } #else @@ -165,7 +165,7 @@ typedef struct _Py_UOps_Opt_IR { _Py_UOpsOptIREntry entries[1]; } _Py_UOps_Opt_IR; -static PyTypeObject _Py_UOps_Opt_IR_Type = { +PyTypeObject _Py_UOps_Opt_IR_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops SSA IR", .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), @@ -303,7 +303,7 @@ abstractframe_dealloc(_Py_UOpsAbstractFrame *self) Py_TYPE(self)->tp_free((PyObject *)self); } -static PyTypeObject _Py_UOpsAbstractFrame_Type = { +PyTypeObject _Py_UOpsAbstractFrame_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract frame", .tp_basicsize = sizeof(_Py_UOpsAbstractFrame) , @@ -376,7 +376,7 @@ abstractinterp_dealloc(PyObject *o) Py_TYPE(self)->tp_free((PyObject *)self); } -static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { +PyTypeObject _Py_UOpsAbstractInterpContext_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's context", .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymbolicExpression *), From 9e5ef680163e44dc30edfd266b1a2803470ab30c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 15:00:48 +0800 Subject: [PATCH 030/111] fix refleak --- Lib/test/test_capi/test_opt.py | 20 ++++---------------- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 5 +++++ 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 9a654e389771ad..beeea35d961d6e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -28,19 +28,7 @@ def clear_executors(func): func.__code__ = func.__code__.replace() -class UopsTestCase(unittest.TestCase): - def setUp(self): - """For the sake of refleak tests, we need to disable any - current optimizers we might have completely. - """ - self.old = _testinternalcapi.get_optimizer() - _testinternalcapi.set_optimizer(None) - - def tearDown(self): - _testinternalcapi.set_optimizer(self.old) - - -class TestOptimizerAPI(UopsTestCase): +class TestOptimizerAPI(unittest.TestCase): def test_get_counter_optimizer_dealloc(self): # See gh-108727 @@ -133,7 +121,7 @@ def get_first_executor(func): return None -class TestExecutorInvalidation(UopsTestCase): +class TestExecutorInvalidation(unittest.TestCase): def setUp(self): self.old = _testinternalcapi.get_optimizer() @@ -195,7 +183,7 @@ def f(): _testinternalcapi.invalidate_executors(f.__code__) self.assertFalse(exe.is_valid()) -class TestUops(UopsTestCase): +class TestUops(unittest.TestCase): def test_basic_loop(self): def testfunc(x): @@ -554,7 +542,7 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) -class TestUopsOptimization(UopsTestCase): +class TestUopsOptimization(unittest.TestCase): def test_int_constant_propagation(self): def testfunc(loops): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5d5ac47dab4ac5..d95c943ad7813c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4089,7 +4089,8 @@ dummy_func( // Nothing, just a sentinel. } - op(_SHRINK_STACK, (unused[oparg] --)) { + op(_SHRINK_STACK, (args[oparg] --)) { + DECREF_INPUTS(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ed245f112771b3..32b40116aa305b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3423,7 +3423,12 @@ } case _SHRINK_STACK: { + PyObject **args; oparg = CURRENT_OPARG(); + args = &stack_pointer[-oparg]; + for (int _i = oparg; --_i >= 0;) { + Py_DECREF(args[_i]); + } stack_pointer += -oparg; break; } From 1f27abb3535435acaa582f575e768a31aca3d256 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 18:12:00 +0800 Subject: [PATCH 031/111] peepholing, make _CHECK_PEP_523 a guard --- Include/internal/pycore_opcode_metadata.h | 14 +- Include/internal/pycore_uop_metadata.h | 48 +++---- Lib/test/test_capi/test_opt.py | 2 + Python/abstract_interp_cases.c.h | 1 + Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 120 ++++++++++++++---- Tools/cases_generator/analyzer.py | 5 +- Tools/cases_generator/generators_common.py | 4 +- .../opcode_metadata_generator.py | 2 +- .../tier2_abstract_generator.py | 3 + 10 files changed, 139 insertions(+), 62 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1226f75afc8d4f..c0df1ffd4d6f5a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -910,7 +910,7 @@ enum InstructionFormat { #define HAS_ERROR_FLAG (256) #define HAS_ESCAPES_FLAG (512) #define HAS_PURE_FLAG (1024) -#define HAS_PASSTHROUGH_FLAG (2048) +#define HAS_GUARD_FLAG (2048) #define HAS_SPECIAL_OPT_FLAG (4096) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) @@ -923,7 +923,7 @@ enum InstructionFormat { #define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG)) #define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG)) #define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG)) -#define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) +#define OPCODE_HAS_GUARD(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_GUARD_FLAG)) #define OPCODE_HAS_SPECIAL_OPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_SPECIAL_OPT_FLAG)) #define OPARG_FULL 0 @@ -1096,7 +1096,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1158,10 +1158,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [POP_BLOCK] = { true, -1, 0 }, - [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 4b4f3119f5885e..5f50fb0a024d70 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = 0, + [_NOP] = HAS_PURE_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, @@ -32,22 +32,22 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNARY_NOT] = HAS_PURE_FLAG, [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_TO_BOOL_INT] = HAS_DEOPT_FLAG, [_TO_BOOL_LIST] = HAS_DEOPT_FLAG, [_TO_BOOL_NONE] = HAS_DEOPT_FLAG, [_TO_BOOL_STR] = HAS_DEOPT_FLAG, [_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG, [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, - [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -112,17 +112,17 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_SLOT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_CLASS] = HAS_ARG_FLAG, - [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, [_STORE_ATTR_SLOT] = HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -142,32 +142,32 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GET_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_YIELD_FROM_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FOR_ITER_TIER_TWO] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_LIST] = 0, - [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_TUPLE] = 0, - [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_ASYNC_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_EXC_INFO] = 0, - [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_METHOD_WITH_VALUES] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_METHOD_NO_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = HAS_ARG_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = HAS_ARG_FLAG, - [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_METHOD_LAZY_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, - [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, - [_CHECK_PEP_523] = HAS_DEOPT_FLAG, - [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_PEP_523] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index beeea35d961d6e..05432ec47d8424 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -563,6 +563,8 @@ def testfunc(loops): self.assertEqual(res, 1) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) def test_int_type_propagation(self): def testfunc(loops): diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 1e1140f8c549c1..67231aacfc56c6 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1652,6 +1652,7 @@ } case _CHECK_PEP_523: { + goto guard_required; break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d95c943ad7813c..39b56c40d0cbc1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -133,7 +133,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - inst(NOP, (--)) { + pure inst(NOP, (--)) { } family(RESUME, 0) = { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b527504086c8f2..b634eaf87490c6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -100,14 +100,10 @@ typedef struct { typedef struct _Py_UOpsSymbolicExpression { Py_ssize_t operand_count; - Py_ssize_t idx; // Value numbering but only for types and constant values. // https://en.wikipedia.org/wiki/Value_numbering _Py_UOpsSymType *ty_number; - // The region where this expression was first created. - // This matters for anything that isn't immutable - int originating_region; // The following fields are for codegen. _PyUOpInstruction inst; @@ -338,7 +334,6 @@ typedef struct _Py_UOpsAbstractInterpContext { // The current "executing" frame. _Py_UOpsAbstractFrame *frame; - int curr_region_id; _Py_UOps_Opt_IR *ir; // Arena for the symbolic expression themselves. @@ -458,7 +453,6 @@ abstractinterp_context_new(PyCodeObject *co, self->localsplus[i] = NULL; } - self->curr_region_id = 0; // Setup the arena for sym expressions. self->s_arena.arena = arena; @@ -773,13 +767,11 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, ty->types = 0; self->ty_number = ty; - self->idx = -1; self->ty_number->types = 0; self->inst = inst; if (const_val != NULL) { sym_set_type_from_const(self, const_val); } - self->originating_region = ctx->curr_region_id; @@ -971,9 +963,9 @@ op_is_end(uint32_t opcode) } static inline bool -op_is_passthrough(uint32_t opcode) +op_is_guard(uint32_t opcode) { -return _PyUop_Flags[opcode] & HAS_PASSTHROUGH_FLAG; + return _PyUop_Flags[opcode] & HAS_GUARD_FLAG; } static inline bool @@ -1008,6 +1000,19 @@ get_const(_Py_UOpsSymbolicExpression *expr) } +static int +write_bookkeeping_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr) +{ + if ((curr-1)->opcode == _CHECK_VALIDITY && ((curr-2)->opcode == _SET_IP)) { + if (ir_plain_inst(ctx->ir, *(curr-2)) < 0) { + return -1; + } + if (ir_plain_inst(ctx->ir, *(curr-1)) < 0) { + return -1; + } + } + return 0; +} static int write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, bool copy_types) { @@ -1037,6 +1042,7 @@ write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, b } ctx->frame->stack[i] = new_stack; } + return 0; error: @@ -1148,36 +1154,57 @@ uop_abstract_interpret_single_inst( switch (opcode) { #include "abstract_interp_cases.c.h" // Note: LOAD_FAST_CHECK is not pure!!! - case LOAD_FAST_CHECK: + case LOAD_FAST_CHECK: { STACK_GROW(1); - PEEK(1) = GETLOCAL(oparg); - assert(PEEK(1)->inst.opcode == INIT_FAST || PEEK(1)->inst.opcode == LOAD_FAST_CHECK); - PEEK(1)->inst.opcode = LOAD_FAST_CHECK; - ctx->frame->stack_pointer = stack_pointer; - if (write_stack_to_ir(ctx, inst, true) < 0) { + if (write_bookkeeping_to_ir(ctx, inst) < 0) { + goto error; + } + if (ir_plain_inst(ctx->ir, *inst) < 0) { + goto error; + } + _Py_UOpsSymbolicExpression * local = GETLOCAL(oparg); + _Py_UOpsSymbolicExpression * new_local = sym_init_unknown(ctx); + if (new_local == NULL) { goto error; } + sym_copy_type_number(local, new_local); + PEEK(1) = new_local; break; - case LOAD_FAST: + } + case LOAD_FAST: { STACK_GROW(1); + _Py_UOpsSymbolicExpression * local = GETLOCAL(oparg); + // Might be NULL - replace with LOAD_FAST_CHECK + if (sym_is_type(local, NULL_TYPE)) { + if (write_bookkeeping_to_ir(ctx, inst) < 0) { + goto error; + } + _PyUOpInstruction temp = *inst; + temp.opcode = LOAD_FAST_CHECK; + if (ir_plain_inst(ctx->ir, temp) < 0) { + goto error; + } + _Py_UOpsSymbolicExpression * new_local = sym_init_unknown(ctx); + if (new_local == NULL) { + goto error; + } + sym_copy_type_number(local, new_local); + PEEK(1) = new_local; + break; + } // Guaranteed by the CPython bytecode compiler to not be uninitialized. PEEK(1) = GETLOCAL(oparg); - if (sym_is_type(PEEK(1), NULL_TYPE)) { - PEEK(1)->inst.opcode = LOAD_FAST_CHECK; - } PEEK(1)->inst.target = inst->target; assert(PEEK(1)); break; + } case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); assert(PEEK(1)->inst.opcode == INIT_FAST); PEEK(1)->inst.opcode = LOAD_FAST_AND_CLEAR; ctx->frame->stack_pointer = stack_pointer; - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); if (new_local == NULL) { goto error; @@ -1439,7 +1466,7 @@ uop_abstract_interpret( if (!op_is_pure(curr->opcode) && !op_is_specially_handled(curr->opcode) && !op_is_bookkeeping(curr->opcode) && - !op_is_passthrough(curr->opcode)) { + !op_is_guard(curr->opcode)) { DPRINTF(3, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); if (first_impure) { if (write_stack_to_ir(ctx, curr, false) < 0) { @@ -1450,7 +1477,6 @@ uop_abstract_interpret( } } first_impure = false; - ctx->curr_region_id++; if (ir_plain_inst(ctx->ir, *curr) < 0) { goto error; } @@ -1459,6 +1485,7 @@ uop_abstract_interpret( first_impure = true; } + status = uop_abstract_interpret_single_inst( curr, end, ctx ); @@ -1606,7 +1633,6 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, } } - // Finally, emit the operation itself. return emit_i(emitter, sym->inst); } @@ -1740,6 +1766,46 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } +static void +peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) +{ + for (int i = 0; i < buffer_size; i++) { + _PyUOpInstruction *curr = buffer + i; + int oparg = curr->oparg; + switch(curr->opcode) { + case _SHRINK_STACK: { + // If all that precedes a _SHRINK_STACK is a bunch of LOAD_FAST, + // then we can safely eliminate that without side effects. + int load_fast_count = 0; + _PyUOpInstruction *back = curr-1; + while((back->opcode == _SET_IP || + back->opcode == _CHECK_VALIDITY || + back->opcode == LOAD_FAST) && + load_fast_count < oparg) { + load_fast_count += back->opcode == LOAD_FAST; + back--; + } + if (load_fast_count == oparg) { + curr->opcode = NOP; + back = curr-1; + load_fast_count = 0; + while((back->opcode == _SET_IP || + back->opcode == _CHECK_VALIDITY || + back->opcode == LOAD_FAST) && + load_fast_count < oparg) { + back->opcode = NOP; + load_fast_count += back->opcode == LOAD_FAST; + back--; + } + } + break; + } + default: + break; + } + } +} + int _Py_uop_analyze_and_optimize( @@ -1781,6 +1847,8 @@ _Py_uop_analyze_and_optimize( goto error; } + peephole_optimizations(temp_writebuffer, trace_len); + // Fill in our new trace! memcpy(buffer, temp_writebuffer, buffer_size * sizeof(_PyUOpInstruction)); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index aee9874922f9ae..8f0d8209786ab5 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -5,6 +5,9 @@ from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR +SPECIAL_GUARDS = { + "_CHECK_PEP_523", +} @dataclass class Properties: @@ -478,7 +481,7 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts and infallible, + guard=op.name in SPECIAL_GUARDS or (passthrough and deopts and infallible), specially_handled_in_optimizer=op.name in SPECIALLY_HANDLED_ABSTRACT_INSTR, ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 1557b816044eb0..0e804859668d27 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -211,8 +211,8 @@ def cflags(p: Properties) -> str: flags.append("HAS_ESCAPES_FLAG") if p.pure: flags.append("HAS_PURE_FLAG") - if p.passthrough: - flags.append("HAS_PASSTHROUGH_FLAG") + if p.guard: + flags.append("HAS_GUARD_FLAG") if p.specially_handled_in_optimizer: flags.append("HAS_SPECIAL_OPT_FLAG") if flags: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 30958c845db6f1..b9a278ad269f8b 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -51,7 +51,7 @@ "ERROR", "ESCAPES", "PURE", - "PASSTHROUGH", + "GUARD", "SPECIAL_OPT", ] diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 4919aac133ee14..d2f2025ecc3db1 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -212,6 +212,9 @@ def _write_body_abstract_interp_pure_uop( mangled_uop.stack.inputs ) + if uop.name == "_NOP": + return + assert ( len(uop.stack.outputs) == 1 ), f"Currently we only support 1 stack output for pure ops: {uop}" From c88a8c67edb399016ecbb35769fa018670b490e2 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 18:19:07 +0800 Subject: [PATCH 032/111] fix bug --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b634eaf87490c6..dee5908038c348 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1793,8 +1793,8 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) back->opcode == _CHECK_VALIDITY || back->opcode == LOAD_FAST) && load_fast_count < oparg) { - back->opcode = NOP; load_fast_count += back->opcode == LOAD_FAST; + back->opcode = NOP; back--; } } From 307c66fd93aa45e8564930055303bbd36d23ca2e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 20:09:19 +0800 Subject: [PATCH 033/111] make things const? Co-Authored-By: Jules <57632293+JuliaPoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index dee5908038c348..efa1b1e184c5a5 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -161,7 +161,7 @@ typedef struct _Py_UOps_Opt_IR { _Py_UOpsOptIREntry entries[1]; } _Py_UOps_Opt_IR; -PyTypeObject _Py_UOps_Opt_IR_Type = { +const PyTypeObject _Py_UOps_Opt_IR_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops SSA IR", .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), @@ -299,7 +299,7 @@ abstractframe_dealloc(_Py_UOpsAbstractFrame *self) Py_TYPE(self)->tp_free((PyObject *)self); } -PyTypeObject _Py_UOpsAbstractFrame_Type = { +const PyTypeObject _Py_UOpsAbstractFrame_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract frame", .tp_basicsize = sizeof(_Py_UOpsAbstractFrame) , From 19ce5382aec57e52d348382c9fbe49132948dc98 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 20:47:49 +0800 Subject: [PATCH 034/111] allow for more scratch space, but keep traces same --- Python/optimizer.c | 13 ++++++++----- Python/optimizer_analysis.c | 5 +++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 5a90b34f66c0fd..f2c9042b3cabb0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -16,7 +16,10 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA +// This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 512 +// This the above + additional working space we need. +#define UOP_MAX_TRACE_WORKING_LENGTH UOP_MAX_TRACE_LENGTH * 2 #define MAX_EXECUTORS_SIZE 256 @@ -720,7 +723,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) { int count = 0; SET_BIT(used, 0); - for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) { + for (int i = 0; i < UOP_MAX_TRACE_WORKING_LENGTH; i++) { if (!BIT_IS_SET(used, i)) { continue; } @@ -752,7 +755,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) { - uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; + uint32_t used[(UOP_MAX_TRACE_WORKING_LENGTH + 31)/32] = { 0 }; int length = compute_used(buffer, used); _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length); if (executor == NULL) { @@ -760,7 +763,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } int dest = length - 1; /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ - for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { + for (int i = UOP_MAX_TRACE_WORKING_LENGTH-1; i >= 0; i--) { if (!BIT_IS_SET(used, i)) { continue; } @@ -811,7 +814,7 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + _PyUOpInstruction buffer[UOP_MAX_TRACE_WORKING_LENGTH]; int err = translate_bytecode_to_trace(code, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies); if (err <= 0) { // Error or nothing translated @@ -819,7 +822,7 @@ uop_optimize( } OPT_STAT_INC(traces_created); // This clears its errors, so if it fails it just doesn't optimize. - err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); + err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_WORKING_LENGTH, curr_stackentries); if (err < 0) { return -1; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index efa1b1e184c5a5..118fc71e44d719 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -114,6 +114,7 @@ typedef struct _Py_UOpsSymbolicExpression { typedef enum _Py_UOps_IRStore_IdKind { TARGET_NONE = -2, TARGET_UNUSED = -1, + // Sentinel -- everything here and above are real STORE_FAST opargs! TARGET_LOCAL = 0, } _Py_UOps_IRStore_IdKind; @@ -161,7 +162,7 @@ typedef struct _Py_UOps_Opt_IR { _Py_UOpsOptIREntry entries[1]; } _Py_UOps_Opt_IR; -const PyTypeObject _Py_UOps_Opt_IR_Type = { +PyTypeObject _Py_UOps_Opt_IR_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops SSA IR", .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), @@ -299,7 +300,7 @@ abstractframe_dealloc(_Py_UOpsAbstractFrame *self) Py_TYPE(self)->tp_free((PyObject *)self); } -const PyTypeObject _Py_UOpsAbstractFrame_Type = { +PyTypeObject _Py_UOpsAbstractFrame_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract frame", .tp_basicsize = sizeof(_Py_UOpsAbstractFrame) , From 3353996c4be41db0bccfc937ed74ff6b988e5f79 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 20:58:55 +0800 Subject: [PATCH 035/111] fix upstream changes --- Python/abstract_interp_cases.c.h | 14 +++++++------- Python/bytecodes.c | 4 ---- Python/executor_cases.c.h | 9 --------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 67231aacfc56c6..a33ebf1810fe3c 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1945,7 +1945,7 @@ break; } - case _LOAD_CONST_INLINE_BORROW: { + case _LOAD_CONST_INLINE: { _Py_UOpsSymbolicExpression *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; @@ -1954,12 +1954,7 @@ break; } - case _INTERNAL_INCREMENT_OPT_COUNTER: { - stack_pointer += -1; - break; - } - - case _LOAD_CONST_INLINE: { + case _LOAD_CONST_INLINE_BORROW: { _Py_UOpsSymbolicExpression *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; @@ -1968,6 +1963,11 @@ break; } + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + break; + } + case INIT_FAST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d11a99902c0018..fbf6cf30d98efd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4084,10 +4084,6 @@ dummy_func( exe->count++; } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = Py_NewRef(ptr); - } - // Represents a possibly uninitialized value in the abstract interpreter. op(INIT_FAST, (--)) { // Nothing, just a sentinel. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b2fb68ff6305fc..9ef2e5d1c39d73 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3417,15 +3417,6 @@ break; } - case _LOAD_CONST_INLINE: { - PyObject *value; - PyObject *ptr = (PyObject *)CURRENT_OPERAND(); - value = Py_NewRef(ptr); - stack_pointer[0] = value; - stack_pointer += 1; - break; - } - case INIT_FAST: { // Nothing, just a sentinel. break; From 543c827e9ec990e7415858adc7fd0fd4fd6929ed Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 20:59:50 +0800 Subject: [PATCH 036/111] apply same peephole from upstream --- Python/optimizer_analysis.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 15e5b1926c2d5a..e4da7e230760e8 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1613,7 +1613,8 @@ compile_sym_to_uops(_Py_UOpsEmitter *emitter, } } - inst.opcode = _LOAD_CONST_INLINE; + inst.opcode = _Py_IsImmortal(sym->ty_number->const_val) + ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; inst.oparg = sym->inst.oparg; inst.operand = (uint64_t)Py_NewRef(sym->ty_number->const_val); return emit_i(emitter, inst); @@ -1802,6 +1803,9 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) } break; } + case _CHECK_PEP_523: + curr->opcode = NOP; + break; default: break; } From 6e554803516c992c7a73bde456ca95617e8b2a10 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 24 Jan 2024 21:04:34 +0800 Subject: [PATCH 037/111] fix eval frame --- Python/optimizer_analysis.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e4da7e230760e8..139aa4184b9868 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1804,7 +1804,11 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) break; } case _CHECK_PEP_523: - curr->opcode = NOP; + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + curr->opcode = _NOP; + } break; default: break; From 4e74c5b7036dc43842b154fa26c8e224cbd39e59 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 20:51:49 +0800 Subject: [PATCH 038/111] use uops as IR --- Include/internal/pycore_uop_metadata.h | 2 +- Python/abstract_interp_cases.c.h | 531 ++++----- Python/optimizer_analysis.c | 1032 +++++------------ .../cases_generator/tier2_abstract_common.py | 1 + .../tier2_abstract_generator.py | 50 +- 5 files changed, 562 insertions(+), 1054 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 469d72e77c747f..207333f0e38c4c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -168,7 +168,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, - [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_PUSH_FRAME] = HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a33ebf1810fe3c..b9a4c5a63ac3e0 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -19,18 +19,18 @@ /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ case _END_SEND: { - _Py_UOpsSymbolicExpression *__value_; - _Py_UOpsSymbolicExpression *__receiver_; + _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymbolicValue *__receiver_; __value_ = stack_pointer[-1]; __receiver_ = stack_pointer[-2]; - __value_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __receiver_, __value_); + __value_ = _Py_UOpsSymbolicValue_New(ctx, NULL); stack_pointer[-2] = __value_; stack_pointer += -1; break; } case _UNARY_NEGATIVE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -38,8 +38,8 @@ } case _UNARY_NOT: { - _Py_UOpsSymbolicExpression *__value_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymbolicValue *__res_; __value_ = stack_pointer[-1]; // Constant evaluation if (is_const(__value_)){ @@ -48,10 +48,12 @@ value = get_const(__value_); assert(PyBool_Check(value)); res = Py_IsFalse(value) ? Py_True : Py_False; - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 1 , __value_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 1; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 1 , __value_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -59,7 +61,7 @@ } case _TO_BOOL: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -67,7 +69,7 @@ } case _TO_BOOL_BOOL: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = stack_pointer[-1]; // Constant evaluation if (is_const(__value_)) { @@ -76,14 +78,13 @@ if (!PyBool_Check(value)) goto error; STAT_INC(TO_BOOL, hit); DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _TO_BOOL_INT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -91,7 +92,7 @@ } case _TO_BOOL_LIST: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -99,7 +100,7 @@ } case _TO_BOOL_NONE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -107,7 +108,7 @@ } case _TO_BOOL_STR: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -115,7 +116,7 @@ } case _TO_BOOL_ALWAYS_TRUE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -123,7 +124,7 @@ } case _UNARY_INVERT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -131,8 +132,8 @@ } case _GUARD_BOTH_INT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -145,26 +146,25 @@ if (!PyLong_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYLONG_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYLONG_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0); } break; } case _BINARY_OP_MULTIPLY_INT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -178,10 +178,12 @@ res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -192,9 +194,9 @@ } case _BINARY_OP_ADD_INT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -208,10 +210,12 @@ res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -222,9 +226,9 @@ } case _BINARY_OP_SUBTRACT_INT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -238,10 +242,12 @@ res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -252,8 +258,8 @@ } case _GUARD_BOTH_FLOAT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -266,26 +272,25 @@ if (!PyFloat_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYFLOAT_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYFLOAT_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0); } break; } case _BINARY_OP_MULTIPLY_FLOAT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -300,10 +305,12 @@ ((PyFloatObject *)left)->ob_fval * ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -314,9 +321,9 @@ } case _BINARY_OP_ADD_FLOAT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -331,10 +338,12 @@ ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -345,9 +354,9 @@ } case _BINARY_OP_SUBTRACT_FLOAT: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -362,10 +371,12 @@ ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -376,8 +387,8 @@ } case _GUARD_BOTH_UNICODE: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -390,26 +401,25 @@ if (!PyUnicode_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__left_, PYUNICODE_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicExpression *)__right_, PYUNICODE_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0); } break; } case _BINARY_OP_ADD_UNICODE: { - _Py_UOpsSymbolicExpression *__right_; - _Py_UOpsSymbolicExpression *__left_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__right_; + _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymbolicValue *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -423,10 +433,12 @@ res = PyUnicode_Concat(left, right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, (PyObject *)res, 0, NULL, 2 , __left_, __right_); - } + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + shrink_stack.oparg = 2; + if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, 0, NULL, 2 , __left_, __right_); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); } if (__res_ == NULL) goto error; // Type propagation @@ -437,7 +449,7 @@ } case _BINARY_SUBSCR: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -446,7 +458,7 @@ } case _BINARY_SLICE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-3] = __res_; @@ -460,7 +472,7 @@ } case _BINARY_SUBSCR_LIST_INT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -469,7 +481,7 @@ } case _BINARY_SUBSCR_STR_INT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -478,7 +490,7 @@ } case _BINARY_SUBSCR_TUPLE_INT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -487,7 +499,7 @@ } case _BINARY_SUBSCR_DICT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -528,7 +540,7 @@ } case _CALL_INTRINSIC_1: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -536,7 +548,7 @@ } case _CALL_INTRINSIC_2: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -549,7 +561,7 @@ /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ case _GET_AITER: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -557,7 +569,7 @@ } case _GET_ANEXT: { - _Py_UOpsSymbolicExpression *__awaitable_; + _Py_UOpsSymbolicValue *__awaitable_; __awaitable_ = sym_init_unknown(ctx); if(__awaitable_ == NULL) goto error; stack_pointer[0] = __awaitable_; @@ -566,7 +578,7 @@ } case _GET_AWAITABLE: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -585,7 +597,7 @@ } case _LOAD_ASSERTION_ERROR: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -594,7 +606,7 @@ } case _LOAD_BUILD_CLASS: { - _Py_UOpsSymbolicExpression *__bc_; + _Py_UOpsSymbolicValue *__bc_; __bc_ = sym_init_unknown(ctx); if(__bc_ == NULL) goto error; stack_pointer[0] = __bc_; @@ -680,7 +692,7 @@ } case _LOAD_LOCALS: { - _Py_UOpsSymbolicExpression *__locals_; + _Py_UOpsSymbolicValue *__locals_; __locals_ = sym_init_unknown(ctx); if(__locals_ == NULL) goto error; stack_pointer[0] = __locals_; @@ -689,7 +701,7 @@ } case _LOAD_FROM_DICT_OR_GLOBALS: { - _Py_UOpsSymbolicExpression *__v_; + _Py_UOpsSymbolicValue *__v_; __v_ = sym_init_unknown(ctx); if(__v_ == NULL) goto error; stack_pointer[-1] = __v_; @@ -697,7 +709,7 @@ } case _LOAD_NAME: { - _Py_UOpsSymbolicExpression *__v_; + _Py_UOpsSymbolicValue *__v_; __v_ = sym_init_unknown(ctx); if(__v_ == NULL) goto error; stack_pointer[0] = __v_; @@ -706,8 +718,8 @@ } case _LOAD_GLOBAL: { - _Py_UOpsSymbolicExpression *__res_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymbolicValue *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -728,8 +740,8 @@ } case _LOAD_GLOBAL_MODULE: { - _Py_UOpsSymbolicExpression *__res_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymbolicValue *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -742,8 +754,8 @@ } case _LOAD_GLOBAL_BUILTINS: { - _Py_UOpsSymbolicExpression *__res_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymbolicValue *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -768,7 +780,7 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[-1] = __value_; @@ -776,7 +788,7 @@ } case _LOAD_DEREF: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -794,7 +806,7 @@ } case _BUILD_STRING: { - _Py_UOpsSymbolicExpression *__str_; + _Py_UOpsSymbolicValue *__str_; __str_ = sym_init_unknown(ctx); if(__str_ == NULL) goto error; stack_pointer[-oparg] = __str_; @@ -803,7 +815,7 @@ } case _BUILD_TUPLE: { - _Py_UOpsSymbolicExpression *__tup_; + _Py_UOpsSymbolicValue *__tup_; __tup_ = sym_init_unknown(ctx); if(__tup_ == NULL) goto error; stack_pointer[-oparg] = __tup_; @@ -812,7 +824,7 @@ } case _BUILD_LIST: { - _Py_UOpsSymbolicExpression *__list_; + _Py_UOpsSymbolicValue *__list_; __list_ = sym_init_unknown(ctx); if(__list_ == NULL) goto error; stack_pointer[-oparg] = __list_; @@ -831,7 +843,7 @@ } case _BUILD_SET: { - _Py_UOpsSymbolicExpression *__set_; + _Py_UOpsSymbolicValue *__set_; __set_ = sym_init_unknown(ctx); if(__set_ == NULL) goto error; stack_pointer[-oparg] = __set_; @@ -840,7 +852,7 @@ } case _BUILD_MAP: { - _Py_UOpsSymbolicExpression *__map_; + _Py_UOpsSymbolicValue *__map_; __map_ = sym_init_unknown(ctx); if(__map_ == NULL) goto error; stack_pointer[-oparg*2] = __map_; @@ -853,7 +865,7 @@ } case _BUILD_CONST_KEY_MAP: { - _Py_UOpsSymbolicExpression *__map_; + _Py_UOpsSymbolicValue *__map_; __map_ = sym_init_unknown(ctx); if(__map_ == NULL) goto error; stack_pointer[-1 - oparg] = __map_; @@ -879,7 +891,7 @@ /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ case _LOAD_SUPER_ATTR_ATTR: { - _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicValue *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-3] = __attr_; @@ -888,8 +900,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__self_or_null_; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__self_or_null_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); @@ -901,8 +913,8 @@ } case _LOAD_ATTR: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__self_or_null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__self_or_null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); @@ -915,7 +927,7 @@ } case _GUARD_TYPE_VERSION: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -927,29 +939,27 @@ if (tp->tp_version_tag != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); } break; } case _CHECK_MANAGED_OBJECT_HAS_VALUES: { - goto guard_required; break; } case _LOAD_ATTR_INSTANCE_VALUE: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -962,7 +972,7 @@ } case _CHECK_ATTR_MODULE: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -975,15 +985,14 @@ if (dict->ma_keys->dk_version != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _LOAD_ATTR_MODULE: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -996,7 +1005,7 @@ } case _CHECK_ATTR_WITH_HINT: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1009,15 +1018,14 @@ if (dict == NULL) goto error; assert(PyDict_CheckExact((PyObject *)dict)); DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _LOAD_ATTR_WITH_HINT: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1030,8 +1038,8 @@ } case _LOAD_ATTR_SLOT: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1044,7 +1052,7 @@ } case _CHECK_ATTR_CLASS: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -1056,15 +1064,14 @@ if (((PyTypeObject *)owner)->tp_version_tag != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _LOAD_ATTR_CLASS: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__null_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1081,7 +1088,7 @@ /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ case _GUARD_DORV_VALUES: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1092,17 +1099,16 @@ if (!_PyDictOrValues_IsValues(dorv)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); } break; } @@ -1120,7 +1126,7 @@ } case _COMPARE_OP: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1129,7 +1135,7 @@ } case _COMPARE_OP_FLOAT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1138,7 +1144,7 @@ } case _COMPARE_OP_INT: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1147,7 +1153,7 @@ } case _COMPARE_OP_STR: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1156,7 +1162,7 @@ } case _IS_OP: { - _Py_UOpsSymbolicExpression *__b_; + _Py_UOpsSymbolicValue *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-2] = __b_; @@ -1165,7 +1171,7 @@ } case _CONTAINS_OP: { - _Py_UOpsSymbolicExpression *__b_; + _Py_UOpsSymbolicValue *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-2] = __b_; @@ -1174,8 +1180,8 @@ } case _CHECK_EG_MATCH: { - _Py_UOpsSymbolicExpression *__rest_; - _Py_UOpsSymbolicExpression *__match_; + _Py_UOpsSymbolicValue *__rest_; + _Py_UOpsSymbolicValue *__match_; __rest_ = sym_init_unknown(ctx); if(__rest_ == NULL) goto error; __match_ = sym_init_unknown(ctx); @@ -1186,7 +1192,7 @@ } case _CHECK_EXC_MATCH: { - _Py_UOpsSymbolicExpression *__b_; + _Py_UOpsSymbolicValue *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-1] = __b_; @@ -1200,7 +1206,7 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - _Py_UOpsSymbolicExpression *__b_; + _Py_UOpsSymbolicValue *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-1] = __b_; @@ -1208,7 +1214,7 @@ } case _GET_LEN: { - _Py_UOpsSymbolicExpression *__len_o_; + _Py_UOpsSymbolicValue *__len_o_; __len_o_ = sym_init_unknown(ctx); if(__len_o_ == NULL) goto error; stack_pointer[0] = __len_o_; @@ -1217,7 +1223,7 @@ } case _MATCH_CLASS: { - _Py_UOpsSymbolicExpression *__attrs_; + _Py_UOpsSymbolicValue *__attrs_; __attrs_ = sym_init_unknown(ctx); if(__attrs_ == NULL) goto error; stack_pointer[-3] = __attrs_; @@ -1226,7 +1232,7 @@ } case _MATCH_MAPPING: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1235,7 +1241,7 @@ } case _MATCH_SEQUENCE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1244,7 +1250,7 @@ } case _MATCH_KEYS: { - _Py_UOpsSymbolicExpression *__values_or_none_; + _Py_UOpsSymbolicValue *__values_or_none_; __values_or_none_ = sym_init_unknown(ctx); if(__values_or_none_ == NULL) goto error; stack_pointer[0] = __values_or_none_; @@ -1253,7 +1259,7 @@ } case _GET_ITER: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -1261,7 +1267,7 @@ } case _GET_YIELD_FROM_ITER: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -1271,7 +1277,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - _Py_UOpsSymbolicExpression *__next_; + _Py_UOpsSymbolicValue *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1282,7 +1288,7 @@ /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ case _ITER_CHECK_LIST: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1291,16 +1297,15 @@ if (Py_TYPE(iter) != &PyListIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_LIST: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1313,14 +1318,13 @@ if (it->it_index >= PyList_GET_SIZE(seq)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _ITER_NEXT_LIST: { - _Py_UOpsSymbolicExpression *__next_; + _Py_UOpsSymbolicValue *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1329,7 +1333,7 @@ } case _ITER_CHECK_TUPLE: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1338,16 +1342,15 @@ if (Py_TYPE(iter) != &PyTupleIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_TUPLE: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1360,14 +1363,13 @@ if (it->it_index >= PyTuple_GET_SIZE(seq)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _ITER_NEXT_TUPLE: { - _Py_UOpsSymbolicExpression *__next_; + _Py_UOpsSymbolicValue *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1376,7 +1378,7 @@ } case _ITER_CHECK_RANGE: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1386,16 +1388,15 @@ if (Py_TYPE(r) != &PyRangeIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_RANGE: { - _Py_UOpsSymbolicExpression *__iter_; + _Py_UOpsSymbolicValue *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1406,14 +1407,13 @@ if (r->len <= 0) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _ITER_NEXT_RANGE: { - _Py_UOpsSymbolicExpression *__next_; + _Py_UOpsSymbolicValue *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; sym_set_type(__next_, PYLONG_TYPE, 0); @@ -1425,8 +1425,8 @@ /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ case _BEFORE_ASYNC_WITH: { - _Py_UOpsSymbolicExpression *__exit_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__exit_; + _Py_UOpsSymbolicValue *__res_; __exit_ = sym_init_unknown(ctx); if(__exit_ == NULL) goto error; __res_ = sym_init_unknown(ctx); @@ -1438,8 +1438,8 @@ } case _BEFORE_WITH: { - _Py_UOpsSymbolicExpression *__exit_; - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__exit_; + _Py_UOpsSymbolicValue *__res_; __exit_ = sym_init_unknown(ctx); if(__exit_ == NULL) goto error; __res_ = sym_init_unknown(ctx); @@ -1451,7 +1451,7 @@ } case _WITH_EXCEPT_START: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1460,8 +1460,8 @@ } case _PUSH_EXC_INFO: { - _Py_UOpsSymbolicExpression *__prev_exc_; - _Py_UOpsSymbolicExpression *__new_exc_; + _Py_UOpsSymbolicValue *__prev_exc_; + _Py_UOpsSymbolicValue *__new_exc_; __prev_exc_ = sym_init_unknown(ctx); if(__prev_exc_ == NULL) goto error; __new_exc_ = sym_init_unknown(ctx); @@ -1473,7 +1473,7 @@ } case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1484,23 +1484,22 @@ if (!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); } break; } case _GUARD_KEYS_VERSION: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -1512,24 +1511,23 @@ if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); } break; } case _LOAD_ATTR_METHOD_WITH_VALUES: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__self_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1541,8 +1539,8 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__self_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1554,7 +1552,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicValue *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-1] = __attr_; @@ -1563,7 +1561,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - _Py_UOpsSymbolicExpression *__attr_; + _Py_UOpsSymbolicValue *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-1] = __attr_; @@ -1572,7 +1570,7 @@ } case _CHECK_ATTR_METHOD_LAZY_DICT: { - _Py_UOpsSymbolicExpression *__owner_; + _Py_UOpsSymbolicValue *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1585,15 +1583,14 @@ if (dict != NULL) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } - goto guard_required; break; } case _LOAD_ATTR_METHOD_LAZY_DICT: { - _Py_UOpsSymbolicExpression *__attr_; - _Py_UOpsSymbolicExpression *__self_ = NULL; + _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymbolicValue *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1609,8 +1606,8 @@ /* _CALL is not a viable micro-op for tier 2 */ case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UOpsSymbolicExpression *__null_; - _Py_UOpsSymbolicExpression *__callable_; + _Py_UOpsSymbolicValue *__null_; + _Py_UOpsSymbolicValue *__callable_; __null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; // Constant evaluation @@ -1623,25 +1620,24 @@ if (Py_TYPE(callable) != &PyMethod_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYMETHOD_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicExpression *)__null_, NULL_TYPE, (uint32_t)0); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0); } break; } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UOpsSymbolicExpression *__func_; - _Py_UOpsSymbolicExpression *__self_; + _Py_UOpsSymbolicValue *__func_; + _Py_UOpsSymbolicValue *__self_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1652,13 +1648,12 @@ } case _CHECK_PEP_523: { - goto guard_required; break; } case _CHECK_FUNCTION_EXACT_ARGS: { - _Py_UOpsSymbolicExpression *__self_or_null_; - _Py_UOpsSymbolicExpression *__callable_; + _Py_UOpsSymbolicValue *__self_or_null_; + _Py_UOpsSymbolicValue *__callable_; __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)CURRENT_OPERAND(); @@ -1675,44 +1670,28 @@ if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; DPRINTF(3, "const eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)){ DPRINTF(2, "type propagation eliminated guard\n"); - break; + new_inst.opcode = _NOP;break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicExpression *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); - goto guard_required; + sym_set_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); } break; } case _CHECK_STACK_SPACE: { - goto guard_required; - break; - } - - case _INIT_CALL_PY_EXACT_ARGS: { - _Py_UOpsSymbolicExpression **__args_; - _Py_UOpsSymbolicExpression *__self_or_null_; - _Py_UOpsSymbolicExpression *__callable_; - _Py_UOpsSymbolicExpression *__new_frame_; - __args_ = &stack_pointer[-oparg]; - __self_or_null_ = stack_pointer[-1 - oparg]; - __callable_ = stack_pointer[-2 - oparg]; - __new_frame_ = _Py_UOpsSymbolicExpression_New(ctx, *inst, NULL, oparg, __args_, 2 , __callable_, __self_or_null_); - stack_pointer[-2 - oparg] = (_Py_UOpsSymbolicExpression *)__new_frame_; - stack_pointer += -1 - oparg; break; } /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ case _CALL_TYPE_1: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1721,7 +1700,7 @@ } case _CALL_STR_1: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1730,7 +1709,7 @@ } case _CALL_TUPLE_1: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1746,7 +1725,7 @@ } case _CALL_BUILTIN_CLASS: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1755,7 +1734,7 @@ } case _CALL_BUILTIN_O: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1764,7 +1743,7 @@ } case _CALL_BUILTIN_FAST: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1773,7 +1752,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1782,7 +1761,7 @@ } case _CALL_LEN: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1791,7 +1770,7 @@ } case _CALL_ISINSTANCE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1800,7 +1779,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1809,7 +1788,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1818,7 +1797,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1827,7 +1806,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1844,7 +1823,7 @@ /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - _Py_UOpsSymbolicExpression *__func_; + _Py_UOpsSymbolicValue *__func_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; stack_pointer[-1] = __func_; @@ -1852,7 +1831,7 @@ } case _SET_FUNCTION_ATTRIBUTE: { - _Py_UOpsSymbolicExpression *__func_; + _Py_UOpsSymbolicValue *__func_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; stack_pointer[-2] = __func_; @@ -1861,7 +1840,7 @@ } case _BUILD_SLICE: { - _Py_UOpsSymbolicExpression *__slice_; + _Py_UOpsSymbolicValue *__slice_; __slice_ = sym_init_unknown(ctx); if(__slice_ == NULL) goto error; stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = __slice_; @@ -1870,7 +1849,7 @@ } case _CONVERT_VALUE: { - _Py_UOpsSymbolicExpression *__result_; + _Py_UOpsSymbolicValue *__result_; __result_ = sym_init_unknown(ctx); if(__result_ == NULL) goto error; stack_pointer[-1] = __result_; @@ -1878,7 +1857,7 @@ } case _FORMAT_SIMPLE: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -1886,7 +1865,7 @@ } case _FORMAT_WITH_SPEC: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1895,7 +1874,7 @@ } case _BINARY_OP: { - _Py_UOpsSymbolicExpression *__res_; + _Py_UOpsSymbolicValue *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1946,7 +1925,7 @@ } case _LOAD_CONST_INLINE: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -1955,7 +1934,7 @@ } case _LOAD_CONST_INLINE_BORROW: { - _Py_UOpsSymbolicExpression *__value_; + _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 139aa4184b9868..c87d0711e26217 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -98,179 +98,20 @@ typedef struct { } _Py_UOpsSymType; -typedef struct _Py_UOpsSymbolicExpression { - Py_ssize_t operand_count; - +typedef struct _Py_UOpsSymbolicValue { // Value numbering but only for types and constant values. // https://en.wikipedia.org/wiki/Value_numbering _Py_UOpsSymType *ty_number; + // More fields can be added later if we want to support + // more optimizations. +} _Py_UOpsSymbolicValue; - // The following fields are for codegen. - _PyUOpInstruction inst; - - struct _Py_UOpsSymbolicExpression *operands[1]; -} _Py_UOpsSymbolicExpression; - -typedef enum _Py_UOps_IRStore_IdKind { - TARGET_NONE = -2, - TARGET_UNUSED = -1, - // Sentinel -- everything here and above are real STORE_FAST opargs! - TARGET_LOCAL = 0, -} _Py_UOps_IRStore_IdKind; - -/* - * The IR has the following types: - * IR_PLAIN_INST - a plain CPython bytecode instruction - * IR_SYMBOLIC - assign a target the value of a symbolic expression - * IR_FRAME_PUSH_INFO - _PUSH_FRAME - * IR_FRAME_POP_INFO - _POP_FRAME - * IR_NOP - nop - */ -typedef enum _Py_UOps_IRStore_EntryKind { - IR_PLAIN_INST = 0, - IR_SYMBOLIC = 1, - IR_FRAME_PUSH_INFO = 2, - IR_FRAME_POP_INFO = 3, - IR_NOP = 4, -} _Py_UOps_IRStore_EntryKind; - -typedef struct _Py_UOpsOptIREntry { - _Py_UOps_IRStore_EntryKind typ; - union { - // IR_PLAIN_INST - _PyUOpInstruction inst; - // IR_SYMBOLIC - struct { - _Py_UOps_IRStore_IdKind assignment_target; - _Py_UOpsSymbolicExpression *expr; - }; - // IR_FRAME_PUSH_INFO, always precedes a _PUSH_FRAME IR_PLAIN_INST - struct { - // Only used in codegen for bookkeeping. - struct _Py_UOpsOptIREntry *prev_frame_ir; - // Localsplus of this frame. - _Py_UOpsSymbolicExpression **my_virtual_localsplus; - }; - // IR_FRAME_POP_INFO, always prior to a _POP_FRAME IR_PLAIN_INST - // no fields, just a sentinel - }; -} _Py_UOpsOptIREntry; - -typedef struct _Py_UOps_Opt_IR { - PyObject_VAR_HEAD - int curr_write; - _Py_UOpsOptIREntry entries[1]; -} _Py_UOps_Opt_IR; - -PyTypeObject _Py_UOps_Opt_IR_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops SSA IR", - .tp_basicsize = sizeof(_Py_UOps_Opt_IR) - sizeof(_Py_UOpsOptIREntry), - .tp_itemsize = sizeof(_Py_UOpsOptIREntry), - .tp_dealloc = (destructor)PyObject_Del, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; - -static int -ir_store(_Py_UOps_Opt_IR *ir, _Py_UOpsSymbolicExpression *expr, _Py_UOps_IRStore_IdKind store_fast_idx) -{ - // Don't store stuff we know will never get compiled. - if(op_is_stackvalue(expr->inst.opcode) && store_fast_idx == TARGET_NONE) { - return 0; - } -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "ir_store: #%d, expr: %s oparg: %d, operand: %p\n", store_fast_idx, - (expr->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[expr->inst.opcode], - expr->inst.oparg, - (void *)expr->inst.operand); -#endif - _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; - entry->typ = IR_SYMBOLIC; - entry->assignment_target = store_fast_idx; - entry->expr = expr; - ir->curr_write++; - if (ir->curr_write >= Py_SIZE(ir)) { - DPRINTF(1, "ir_store: ran out of space \n"); - return -1; - } - return 0; -} - -static int -ir_plain_inst(_Py_UOps_Opt_IR *ir, _PyUOpInstruction inst) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "ir_inst: opcode: %s oparg: %d, operand: %p\n", - (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], - inst.oparg, - (void *)inst.operand); -#endif - _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; - entry->typ = IR_PLAIN_INST; - entry->inst = inst; - ir->curr_write++; - if (ir->curr_write >= Py_SIZE(ir)) { - DPRINTF(1, "ir_plain_inst: ran out of space \n"); - return -1; - } - return 0; -} - -static _Py_UOpsOptIREntry * -ir_frame_push_info(_Py_UOps_Opt_IR *ir) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "ir_frame_push_info\n"); -#endif - _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; - entry->typ = IR_FRAME_PUSH_INFO; - entry->my_virtual_localsplus = NULL; - entry->prev_frame_ir = NULL; - ir->curr_write++; - if (ir->curr_write >= Py_SIZE(ir)) { - DPRINTF(1, "ir_frame_push_info: ran out of space \n"); - return NULL; - } - return entry; -} - - -static int -ir_frame_pop_info(_Py_UOps_Opt_IR *ir) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "ir_frame_pop_info\n"); -#endif - _Py_UOpsOptIREntry *entry = &ir->entries[ir->curr_write]; - entry->typ = IR_FRAME_POP_INFO; - ir->curr_write++; - if (ir->curr_write >= Py_SIZE(ir)) { - DPRINTF(1, "ir_frame_pop_info: ran out of space \n"); - return -1; - } - return 0; -} +typedef struct frame_info { + // Only used in codegen for bookkeeping. + struct frame_info *prev_frame_ir; + // Localsplus of this frame. + _Py_UOpsSymbolicValue **my_virtual_localsplus; +} frame_info; typedef struct _Py_UOpsAbstractFrame { PyObject_HEAD @@ -280,16 +121,16 @@ typedef struct _Py_UOpsAbstractFrame { struct _Py_UOpsAbstractFrame *next; // Symbolic version of co_consts int sym_consts_len; - _Py_UOpsSymbolicExpression **sym_consts; + _Py_UOpsSymbolicValue **sym_consts; // Max stacklen int stack_len; int locals_len; - _Py_UOpsOptIREntry *frame_ir_entry; + frame_info *frame_ir_entry; - _Py_UOpsSymbolicExpression **stack_pointer; - _Py_UOpsSymbolicExpression **stack; - _Py_UOpsSymbolicExpression **locals; + _Py_UOpsSymbolicValue **stack_pointer; + _Py_UOpsSymbolicValue **stack; + _Py_UOpsSymbolicValue **locals; } _Py_UOpsAbstractFrame; static void @@ -310,6 +151,19 @@ PyTypeObject _Py_UOpsAbstractFrame_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; +typedef struct creating_new_frame { + _Py_UOpsSymbolicValue *func; + _Py_UOpsSymbolicValue *self_or_null; + _Py_UOpsSymbolicValue **args; +} creating_new_frame; + + +typedef struct frame_info_arena { + int curr_number; + int max_number; + frame_info *arena; +} frame_info_arena; + typedef struct sym_arena { char *curr_available; char *end; @@ -323,19 +177,26 @@ typedef struct ty_arena { } ty_arena; typedef struct frequent_syms { - _Py_UOpsSymbolicExpression *nulL_sym; - _Py_UOpsSymbolicExpression *push_nulL_sym; + _Py_UOpsSymbolicValue *push_nulL_sym; } frequent_syms; +typedef struct uops_emitter { + _PyUOpInstruction *writebuffer; + _PyUOpInstruction *writebuffer_end; + int curr_i; +} uops_emitter; + // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD - // Stores the symbolic for the upcoming new frame that is about to be created. - _Py_UOpsSymbolicExpression *new_frame_sym; + // Stores the information for the upcoming new frame that is about to be created. + // Corresponds to _INIT_CALL_PY_EXACT_ARGS. + creating_new_frame new_frame_sym; // The current "executing" frame. _Py_UOpsAbstractFrame *frame; - _Py_UOps_Opt_IR *ir; + // An arena for the frame information. + frame_info_arena frame_info; // Arena for the symbolic expression themselves. sym_arena s_arena; @@ -344,15 +205,13 @@ typedef struct _Py_UOpsAbstractInterpContext { // all the constants easily. ty_arena t_arena; - // The terminating instruction for the trace. Could be _JUMP_TO_TOP or - // _EXIT_TRACE. - _PyUOpInstruction *terminating; - frequent_syms frequent_syms; - _Py_UOpsSymbolicExpression **water_level; - _Py_UOpsSymbolicExpression **limit; - _Py_UOpsSymbolicExpression *localsplus[1]; + uops_emitter emitter; + + _Py_UOpsSymbolicValue **water_level; + _Py_UOpsSymbolicValue **limit; + _Py_UOpsSymbolicValue *localsplus[1]; } _Py_UOpsAbstractInterpContext; static void @@ -360,7 +219,6 @@ abstractinterp_dealloc(PyObject *o) { _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; Py_XDECREF(self->frame); - Py_XDECREF(self->ir); if (self->s_arena.arena != NULL) { int tys = self->t_arena.ty_curr_number; for (int i = 0; i < tys; i++) { @@ -375,31 +233,52 @@ abstractinterp_dealloc(PyObject *o) PyTypeObject _Py_UOpsAbstractInterpContext_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's context", - .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymbolicExpression *), - .tp_itemsize = sizeof(_Py_UOpsSymbolicExpression *), + .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymbolicValue *), + .tp_itemsize = sizeof(_Py_UOpsSymbolicValue *), .tp_dealloc = (destructor)abstractinterp_dealloc, .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -static inline _Py_UOps_Opt_IR * -ssa_ir_new(int entries) +// Tags a _PUSH_FRAME with the frame info. +static frame_info * +ir_frame_push_info(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *push_frame) { - _Py_UOps_Opt_IR *ir = PyObject_NewVar(_Py_UOps_Opt_IR, - &_Py_UOps_Opt_IR_Type, - entries); - ir->curr_write = 0; - return ir; +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + DPRINTF(3, "ir_frame_push_info\n"); +#endif + if (ctx->frame_info.curr_number >= ctx->frame_info.max_number) { + DPRINTF(1, "ir_frame_push_info: ran out of space \n"); + return NULL; + } + frame_info *entry = &ctx->frame_info.arena[ctx->frame_info.curr_number]; + entry->my_virtual_localsplus = NULL; + entry->prev_frame_ir = NULL; + // root frame + if (push_frame == NULL) { + assert(ctx->frame_info.curr_number == 0); + ctx->frame_info.curr_number++; + return entry; + } + assert(push_frame->opcode == _PUSH_FRAME); + push_frame->operand = (uintptr_t)entry; + ctx->frame_info.curr_number++; + return entry; } static inline _Py_UOpsAbstractFrame * frame_new(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen, _Py_UOpsOptIREntry *frame_ir_entry); + int curr_stacklen, frame_info *frame_ir_entry); static inline int frame_push(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymbolicExpression **localsplus_start, + _Py_UOpsSymbolicValue **localsplus_start, int locals_len, int curr_stacklen, int total_len); @@ -411,17 +290,25 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame static _Py_UOpsAbstractInterpContext * abstractinterp_context_new(PyCodeObject *co, int curr_stacklen, - int ir_entries) + int ir_entries, + _PyUOpInstruction *new_writebuffer) { int locals_len = co->co_nlocalsplus; int stack_len = co->co_stacksize; _Py_UOpsAbstractFrame *frame = NULL; _Py_UOpsAbstractInterpContext *self = NULL; - _Py_UOps_Opt_IR *ir = NULL; char *arena = NULL; _Py_UOpsSymType *t_arena = NULL; - Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicExpression)) * ir_entries * OVERALLOCATE_FACTOR; + frame_info *frame_info_arena = NULL; + Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicValue)) * ir_entries * OVERALLOCATE_FACTOR; Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; + Py_ssize_t frame_info_arena_size = (sizeof(frame_info)) * ir_entries * OVERALLOCATE_FACTOR; + + frame_info_arena = PyMem_Malloc(frame_info_arena_size); + if (frame_info_arena == NULL) { + goto error; + } + arena = (char *)PyMem_Malloc(arena_size); if (arena == NULL) { goto error; @@ -432,14 +319,6 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - ir = ssa_ir_new(ir_entries * OVERALLOCATE_FACTOR); - if (ir == NULL) { - goto error; - } - _Py_UOpsOptIREntry *root_frame = ir_frame_push_info(ir); - if (root_frame == NULL) { - goto error; - } self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, &_Py_UOpsAbstractInterpContext_Type, @@ -448,6 +327,17 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } + // Setup frame info arena + self->frame_info.curr_number = 0; + self->frame_info.arena = frame_info_arena; + self->frame_info.max_number = ir_entries * OVERALLOCATE_FACTOR; + + + frame_info *root_frame = ir_frame_push_info(self, NULL); + if (root_frame == NULL) { + goto error; + } + self->limit = self->localsplus + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->localsplus; for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { @@ -465,7 +355,10 @@ abstractinterp_context_new(PyCodeObject *co, self->t_arena.ty_max_number = ir_entries * OVERALLOCATE_FACTOR; // Frame setup - self->new_frame_sym = NULL; + self->new_frame_sym.func = NULL; + self->new_frame_sym.args = NULL; + self->new_frame_sym.self_or_null = NULL; + frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen, root_frame); if (frame == NULL) { goto error; @@ -482,41 +375,44 @@ abstractinterp_context_new(PyCodeObject *co, root_frame->my_virtual_localsplus = self->localsplus; // IR and sym setup - self->ir = ir; - self->frequent_syms.nulL_sym = NULL; self->frequent_syms.push_nulL_sym = NULL; + // Emitter setup + self->emitter.writebuffer = new_writebuffer; + self->emitter.curr_i = 0; + self->emitter.writebuffer_end = new_writebuffer + ir_entries; + return self; error: PyMem_Free(arena); PyMem_Free(t_arena); + PyMem_Free(frame_info_arena); if (self != NULL) { // Important so we don't double free them. self->t_arena.arena = NULL; self->s_arena.arena = NULL; + self->frame_info.arena = NULL; } self->frame = NULL; - self->ir = NULL; Py_XDECREF(self); - Py_XDECREF(ir); Py_XDECREF(frame); return NULL; } -static inline _Py_UOpsSymbolicExpression* +static inline _Py_UOpsSymbolicValue* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx); -static inline _Py_UOpsSymbolicExpression ** +static inline _Py_UOpsSymbolicValue ** create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) { Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); - _Py_UOpsSymbolicExpression **sym_consts = PyMem_New(_Py_UOpsSymbolicExpression *, co_const_len); + _Py_UOpsSymbolicValue **sym_consts = PyMem_New(_Py_UOpsSymbolicValue *, co_const_len); if (sym_consts == NULL) { return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { - _Py_UOpsSymbolicExpression *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); + _Py_UOpsSymbolicValue *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); if (res == NULL) { goto error; } @@ -529,14 +425,12 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) return NULL; } -static inline _Py_UOpsSymbolicExpression* -sym_init_var(_Py_UOpsAbstractInterpContext *ctx, int locals_idx); -static inline _Py_UOpsSymbolicExpression* +static inline _Py_UOpsSymbolicValue* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); static void -sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym); +sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym); /* * The reason why we have a separate frame_push and frame_initialize is to mimic @@ -545,7 +439,7 @@ sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbo static inline int frame_push(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymbolicExpression **localsplus_start, + _Py_UOpsSymbolicValue **localsplus_start, int locals_len, int curr_stacklen, int total_len) @@ -566,7 +460,7 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame { // Initialize with the initial state of all local variables for (int i = 0; i < locals_len; i++) { - _Py_UOpsSymbolicExpression *local = sym_init_var(ctx, i); + _Py_UOpsSymbolicValue *local = sym_init_unknown(ctx); if (local == NULL) { goto error; } @@ -576,7 +470,7 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame // Initialize the stack as well for (int i = 0; i < curr_stacklen; i++) { - _Py_UOpsSymbolicExpression *stackvar = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *stackvar = sym_init_unknown(ctx); if (stackvar == NULL) { goto error; } @@ -592,9 +486,9 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame static inline _Py_UOpsAbstractFrame * frame_new(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen, _Py_UOpsOptIREntry *frame_ir_entry) + int curr_stacklen, frame_info *frame_ir_entry) { - _Py_UOpsSymbolicExpression **sym_consts = create_sym_consts(ctx, co_consts); + _Py_UOpsSymbolicValue **sym_consts = create_sym_consts(ctx, co_consts); if (sym_consts == NULL) { return NULL; } @@ -618,12 +512,12 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, } static inline bool -sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ); +sym_is_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ); static inline uint64_t -sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ); +sym_type_get_refinement(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ); static inline PyFunctionObject * -extract_func_from_sym(_Py_UOpsSymbolicExpression *frame_sym) +extract_func_from_sym(creating_new_frame *frame_sym) { #ifdef Py_DEBUG char *uop_debug = Py_GETENV(DEBUG_ENV); @@ -631,57 +525,31 @@ char *uop_debug = Py_GETENV(DEBUG_ENV); if (uop_debug != NULL && *uop_debug >= '0') { lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } - DPRINTF(3, "write_stack_to_ir\n"); + DPRINTF(3, "extract_func_from_sym\n"); #endif - switch(frame_sym->inst.opcode) { - case _INIT_CALL_PY_EXACT_ARGS: { - _Py_UOpsSymbolicExpression *callable_sym = frame_sym->operands[0]; - if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { - DPRINTF(1, "error: _PUSH_FRAME not function type\n"); - return NULL; - } - uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); - PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); - if (func == NULL) { - DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); - return NULL; - } - return func; + _Py_UOpsSymbolicValue *callable_sym = frame_sym->func; + assert(callable_sym != NULL); + if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { + DPRINTF(1, "error: _PUSH_FRAME not function type\n"); + return NULL; } - default: - Py_UNREACHABLE(); - } -} - -static inline _Py_UOpsSymbolicExpression* -extract_self_or_null_from_sym(_Py_UOpsSymbolicExpression *frame_sym) -{ - switch(frame_sym->inst.opcode) { - case _INIT_CALL_PY_EXACT_ARGS: - return frame_sym->operands[1]; - default: - Py_UNREACHABLE(); - } + uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); + PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); + if (func == NULL) { + DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); + return NULL; + } + return func; } -static inline _Py_UOpsSymbolicExpression** -extract_args_from_sym(_Py_UOpsSymbolicExpression *frame_sym) -{ - switch(frame_sym->inst.opcode) { - case _INIT_CALL_PY_EXACT_ARGS: - return &frame_sym->operands[2]; - default: - Py_UNREACHABLE(); - } -} // 0 on success, anything else is error. static int ctx_frame_push( _Py_UOpsAbstractInterpContext *ctx, - _Py_UOpsOptIREntry *frame_ir_entry, + frame_info *frame_ir_entry, PyCodeObject *co, - _Py_UOpsSymbolicExpression **localsplus_start + _Py_UOpsSymbolicValue **localsplus_start ) { assert(frame_ir_entry != NULL); @@ -726,20 +594,12 @@ ctx_frame_pop( } static void -sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj); +sym_set_type_from_const(_Py_UOpsSymbolicValue *sym, PyObject *obj); // Steals a reference to const_val -// Creates a symbolic expression consisting of subexpressoins -// from arr_start and va_list. -// The order is -// , -static _Py_UOpsSymbolicExpression* -_Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, - _PyUOpInstruction inst, - PyObject *const_val, - int num_arr, - _Py_UOpsSymbolicExpression **arr_start, - int num_subexprs, ...) +static _Py_UOpsSymbolicValue* +_Py_UOpsSymbolicValue_New(_Py_UOpsAbstractInterpContext *ctx, + PyObject *const_val) { #ifdef Py_DEBUG char *uop_debug = Py_GETENV(DEBUG_ENV); @@ -748,11 +608,9 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } #endif - int total_subexprs = num_arr + num_subexprs; - - _Py_UOpsSymbolicExpression *self = (_Py_UOpsSymbolicExpression *)ctx->s_arena.curr_available; - ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicExpression) + sizeof(_Py_UOpsSymbolicExpression *) * total_subexprs; + _Py_UOpsSymbolicValue *self = (_Py_UOpsSymbolicValue *)ctx->s_arena.curr_available; + ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicValue) + sizeof(_Py_UOpsSymbolicValue *); if (ctx->s_arena.curr_available >= ctx->s_arena.end) { DPRINTF(1, "out of space for symbolic expression\n"); return NULL; @@ -769,40 +627,16 @@ _Py_UOpsSymbolicExpression_New(_Py_UOpsAbstractInterpContext *ctx, self->ty_number = ty; self->ty_number->types = 0; - self->inst = inst; if (const_val != NULL) { sym_set_type_from_const(self, const_val); } - - - // Setup - int i = 0; - _Py_UOpsSymbolicExpression **operands = self->operands; - va_list curr; - - va_start(curr, num_subexprs); - - for (; i < num_subexprs; i++) { - operands[i] = va_arg(curr, _Py_UOpsSymbolicExpression *); - assert(operands[i]); - } - - va_end(curr); - - for (int x = 0; x < num_arr; x++) { - operands[i+x] = arr_start[x]; - assert(operands[i+x]); - } - - self->operand_count = total_subexprs; - return self; } static void -sym_set_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +sym_set_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) { sym->ty_number->types |= 1 << typ; if (typ <= MAX_TYPE_WITH_REFINEMENT) { @@ -811,14 +645,14 @@ sym_set_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint6 } static void -sym_copy_type_number(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) +sym_copy_type_number(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym) { to_sym->ty_number = from_sym->ty_number; } // Note: for this, to_sym MUST point to brand new sym. static void -sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbolicExpression *to_sym) +sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym) { to_sym->ty_number->types = (from_sym->ty_number->types & IMMUTABLES); if (to_sym->ty_number->types) { @@ -828,7 +662,7 @@ sym_copy_immutable_type_info(_Py_UOpsSymbolicExpression *from_sym, _Py_UOpsSymbo // Steals a reference to obj static void -sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj) +sym_set_type_from_const(_Py_UOpsSymbolicValue *sym, PyObject *obj) { PyTypeObject *tp = Py_TYPE(obj); sym->ty_number->const_val = obj; @@ -866,43 +700,20 @@ sym_set_type_from_const(_Py_UOpsSymbolicExpression *sym, PyObject *obj) } -static inline _Py_UOpsSymbolicExpression* -sym_init_var(_Py_UOpsAbstractInterpContext *ctx, int locals_idx) -{ - _PyUOpInstruction inst = {INIT_FAST, locals_idx, 0, 0}; - return _Py_UOpsSymbolicExpression_New(ctx, - inst, - NULL, - 0, - NULL, - 0); -} - -static inline _Py_UOpsSymbolicExpression* +static inline _Py_UOpsSymbolicValue* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) { - _PyUOpInstruction inst = {CACHE, 0, 0, 0}; - return _Py_UOpsSymbolicExpression_New(ctx, - inst, - NULL, - 0, - NULL, - 0); + return _Py_UOpsSymbolicValue_New(ctx,NULL); } // Steals a reference to const_val -static inline _Py_UOpsSymbolicExpression* +static inline _Py_UOpsSymbolicValue* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) { - _PyUOpInstruction inst = {LOAD_CONST, const_idx, 0, 0}; assert(const_val != NULL); - _Py_UOpsSymbolicExpression *temp = _Py_UOpsSymbolicExpression_New( + _Py_UOpsSymbolicValue *temp = _Py_UOpsSymbolicValue_New( ctx, - inst, - const_val, - 0, - NULL, - 0 + const_val ); if (temp == NULL) { return NULL; @@ -911,24 +722,23 @@ sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int cons return temp; } -static _Py_UOpsSymbolicExpression* +static _Py_UOpsSymbolicValue* sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) { if (ctx->frequent_syms.push_nulL_sym != NULL) { return ctx->frequent_syms.push_nulL_sym; } - _Py_UOpsSymbolicExpression *null_sym = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *null_sym = sym_init_unknown(ctx); if (null_sym == NULL) { return NULL; } - null_sym->inst.opcode = PUSH_NULL; sym_set_type(null_sym, NULL_TYPE, 0); ctx->frequent_syms.push_nulL_sym = null_sym; return null_sym; } static inline bool -sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) +sym_is_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ) { if ((sym->ty_number->types & (1 << typ)) == 0) { return false; @@ -937,7 +747,7 @@ sym_is_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) } static inline bool -sym_matches_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +sym_matches_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) { if (!sym_is_type(sym, typ)) { return false; @@ -949,7 +759,7 @@ sym_matches_type(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ, u } static uint64_t -sym_type_get_refinement(_Py_UOpsSymbolicExpression *sym, _Py_UOpsSymExprTypeEnum typ) +sym_type_get_refinement(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ) { assert(sym_is_type(sym, typ)); assert(typ <= MAX_TYPE_WITH_REFINEMENT); @@ -979,7 +789,8 @@ static inline bool op_is_bookkeeping(uint32_t opcode) { return (opcode == _SET_IP || opcode == _CHECK_VALIDITY || - opcode == _SAVE_RETURN_OFFSET); + opcode == _SAVE_RETURN_OFFSET || + opcode == _RESUME_CHECK); } static inline bool @@ -989,78 +800,85 @@ op_is_specially_handled(uint32_t opcode) } static inline bool -is_const(_Py_UOpsSymbolicExpression *expr) +is_const(_Py_UOpsSymbolicValue *expr) { return expr->ty_number->const_val != NULL; } static inline PyObject * -get_const(_Py_UOpsSymbolicExpression *expr) +get_const(_Py_UOpsSymbolicValue *expr) { return expr->ty_number->const_val; } static int -write_bookkeeping_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr) -{ - if ((curr-1)->opcode == _CHECK_VALIDITY && ((curr-2)->opcode == _SET_IP)) { - if (ir_plain_inst(ctx->ir, *(curr-2)) < 0) { - return -1; - } - if (ir_plain_inst(ctx->ir, *(curr-1)) < 0) { +clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { + int locals_entries = ctx->frame->locals_len; + for (int i = 0; i < locals_entries; i++) { + _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); + if (new_local == NULL) { return -1; } + sym_copy_immutable_type_info(ctx->frame->locals[i], new_local); + ctx->frame->locals[i] = new_local; } return 0; } -static int -write_stack_to_ir(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *curr, bool copy_types) { +static inline int +emit_i(uops_emitter *emitter, + _PyUOpInstruction inst) +{ #ifdef Py_DEBUG char *uop_debug = Py_GETENV(DEBUG_ENV); int lltrace = 0; if (uop_debug != NULL && *uop_debug >= '0') { lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } - DPRINTF(3, "write_stack_to_ir\n"); #endif - // Emit the state of the stack first. - Py_ssize_t stack_entries = ctx->frame->stack_pointer - ctx->frame->stack; - assert(stack_entries <= ctx->frame->stack_len); - for (Py_ssize_t i = 0; i < stack_entries; i++) { - if (ir_store(ctx->ir, ctx->frame->stack[i], TARGET_NONE) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression *new_stack = sym_init_unknown(ctx); - if (new_stack == NULL) { - goto error; - } - if (copy_types) { - sym_copy_type_number(ctx->frame->stack[i], new_stack); - } else { - sym_copy_immutable_type_info(ctx->frame->stack[i], new_stack); - } - ctx->frame->stack[i] = new_stack; + if (emitter->curr_i < 0) { + DPRINTF(2, "out of emission space\n"); + return -1; } - + if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { + DPRINTF(2, "out of emission space\n"); + return -1; + } + if (inst.opcode == _NOP) { + return 0; + } + DPRINTF(2, "Emitting instruction at [%d] op: %s, oparg: %d, target: %d, operand: %" PRIu64 " \n", + emitter->curr_i, + (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], + inst.oparg, + inst.target, + inst.operand); + emitter->writebuffer[emitter->curr_i] = inst; + emitter->curr_i++; return 0; - -error: - DPRINTF(1, "write_stack_to_ir error\n"); - return -1; } -static int -clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { - int locals_entries = ctx->frame->locals_len; - for (int i = 0; i < locals_entries; i++) { - _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, i); - if (new_local == NULL) { - return -1; - } - sym_copy_immutable_type_info(ctx->frame->locals[i], new_local); - ctx->frame->locals[i] = new_local; +static inline int +emit_const(uops_emitter *emitter, + PyObject *const_val, + _PyUOpInstruction shrink_stack) +{ +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + if (emit_i(emitter, shrink_stack) < 0) { + return -1; + } + int load_const_opcode = _Py_IsImmortal(const_val) + ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + _PyUOpInstruction load_const = {load_const_opcode, 0, 0, const_val}; + if (emit_i(emitter, load_const) < 0) { + return -1; } return 0; } @@ -1068,7 +886,6 @@ clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { typedef enum { ABSTRACT_INTERP_ERROR, ABSTRACT_INTERP_NORMAL, - ABSTRACT_INTERP_GUARD_REQUIRED, } AbstractInterpExitCodes; @@ -1088,7 +905,7 @@ do { \ #ifndef Py_DEBUG #define GETITEM(ctx, i) (ctx->frame->sym_consts[(i)]) #else -static inline _Py_UOpsSymbolicExpression * +static inline _Py_UOpsSymbolicValue * GETITEM(_Py_UOpsAbstractInterpContext *ctx, Py_ssize_t i) { assert(i < ctx->frame->sym_consts_len); return ctx->frame->sym_consts[i]; @@ -1146,7 +963,9 @@ uop_abstract_interpret_single_inst( uint32_t opcode = inst->opcode; uint64_t operand = inst->operand; - _Py_UOpsSymbolicExpression **stack_pointer = ctx->frame->stack_pointer; + _Py_UOpsSymbolicValue **stack_pointer = ctx->frame->stack_pointer; + _PyUOpInstruction new_inst = *inst; + _PyUOpInstruction shrink_stack = {_SHRINK_STACK, 0, 0, 0}; DPRINTF(3, "Abstract interpreting %s:%d ", @@ -1157,14 +976,8 @@ uop_abstract_interpret_single_inst( // Note: LOAD_FAST_CHECK is not pure!!! case LOAD_FAST_CHECK: { STACK_GROW(1); - if (write_bookkeeping_to_ir(ctx, inst) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression * local = GETLOCAL(oparg); - _Py_UOpsSymbolicExpression * new_local = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); + _Py_UOpsSymbolicValue * new_local = sym_init_unknown(ctx); if (new_local == NULL) { goto error; } @@ -1174,18 +987,12 @@ uop_abstract_interpret_single_inst( } case LOAD_FAST: { STACK_GROW(1); - _Py_UOpsSymbolicExpression * local = GETLOCAL(oparg); + _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); // Might be NULL - replace with LOAD_FAST_CHECK if (sym_is_type(local, NULL_TYPE)) { - if (write_bookkeeping_to_ir(ctx, inst) < 0) { - goto error; - } _PyUOpInstruction temp = *inst; temp.opcode = LOAD_FAST_CHECK; - if (ir_plain_inst(ctx->ir, temp) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression * new_local = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue * new_local = sym_init_unknown(ctx); if (new_local == NULL) { goto error; } @@ -1195,7 +1002,6 @@ uop_abstract_interpret_single_inst( } // Guaranteed by the CPython bytecode compiler to not be uninitialized. PEEK(1) = GETLOCAL(oparg); - PEEK(1)->inst.target = inst->target; assert(PEEK(1)); break; @@ -1203,10 +1009,8 @@ uop_abstract_interpret_single_inst( case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); - assert(PEEK(1)->inst.opcode == INIT_FAST); - PEEK(1)->inst.opcode = LOAD_FAST_AND_CLEAR; ctx->frame->stack_pointer = stack_pointer; - _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); + _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); if (new_local == NULL) { goto error; } @@ -1216,18 +1020,15 @@ uop_abstract_interpret_single_inst( } case LOAD_CONST: { STACK_GROW(1); - PEEK(1) = (_Py_UOpsSymbolicExpression *)GETITEM( + PEEK(1) = (_Py_UOpsSymbolicValue *)GETITEM( ctx, oparg); assert(PEEK(1)->ty_number->const_val != NULL); break; } case STORE_FAST_MAYBE_NULL: case STORE_FAST: { - _Py_UOpsSymbolicExpression *value = PEEK(1); - if (ir_store(ctx->ir, value, oparg) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression *new_local = sym_init_var(ctx, oparg); + _Py_UOpsSymbolicValue *value = PEEK(1); + _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); if (new_local == NULL) { goto error; } @@ -1237,15 +1038,9 @@ uop_abstract_interpret_single_inst( break; } case COPY: { - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression *bottom = PEEK(1 + (oparg - 1)); + _Py_UOpsSymbolicValue *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); - _Py_UOpsSymbolicExpression *temp = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *temp = sym_init_unknown(ctx); if (temp == NULL) { goto error; } @@ -1255,16 +1050,13 @@ uop_abstract_interpret_single_inst( } case POP_TOP: { - if (ir_store(ctx->ir, PEEK(1), -1) < 0) { - goto error; - } STACK_SHRINK(1); break; } case PUSH_NULL: { STACK_GROW(1); - _Py_UOpsSymbolicExpression *null_sym = sym_init_push_null(ctx); + _Py_UOpsSymbolicValue *null_sym = sym_init_push_null(ctx); if (null_sym == NULL) { goto error; } @@ -1272,39 +1064,53 @@ uop_abstract_interpret_single_inst( break; } + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UOpsSymbolicValue **__args_; + _Py_UOpsSymbolicValue *__self_or_null_; + _Py_UOpsSymbolicValue *__callable_; + _Py_UOpsSymbolicValue *__new_frame_; + __args_ = &stack_pointer[-oparg]; + __self_or_null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + // Store the frame symbolic to extract information later + assert(ctx->new_frame_sym.func == NULL); + ctx->new_frame_sym.func = __callable_; + ctx->new_frame_sym.self_or_null = __self_or_null_; + ctx->new_frame_sym.args = __args_; + __new_frame_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + stack_pointer[-2 - oparg] = (_Py_UOpsSymbolicValue *)__new_frame_; + stack_pointer += -1 - oparg; + break; + } + case _PUSH_FRAME: { int argcount = oparg; // TOS is the new frame. - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; - _Py_UOpsOptIREntry *frame_ir_entry = ir_frame_push_info(ctx->ir); + frame_info *frame_ir_entry = ir_frame_push_info(ctx, inst); if (frame_ir_entry == NULL) { goto error; } - assert(ctx->new_frame_sym != NULL); - PyFunctionObject *func = extract_func_from_sym(ctx->new_frame_sym); + PyFunctionObject *func = extract_func_from_sym(&ctx->new_frame_sym); if (func == NULL) { goto error; } PyCodeObject *co = (PyCodeObject *)func->func_code; - _Py_UOpsSymbolicExpression *self_or_null = extract_self_or_null_from_sym(ctx->new_frame_sym); + _Py_UOpsSymbolicValue *self_or_null = ctx->new_frame_sym.self_or_null; assert(self_or_null != NULL); - _Py_UOpsSymbolicExpression **args = extract_args_from_sym(ctx->new_frame_sym); + _Py_UOpsSymbolicValue **args = ctx->new_frame_sym.args; assert(args != NULL); - ctx->new_frame_sym = NULL; + ctx->new_frame_sym.func = NULL; + ctx->new_frame_sym.self_or_null = NULL; + ctx->new_frame_sym.args = NULL; // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS if (!sym_is_type(self_or_null, NULL_TYPE)) { args--; argcount++; } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } if (ctx_frame_push( ctx, frame_ir_entry, @@ -1325,16 +1131,7 @@ uop_abstract_interpret_single_inst( case _POP_FRAME: { assert(STACK_LEVEL() == 1); - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } - if (ir_frame_pop_info(ctx->ir) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } - _Py_UOpsSymbolicExpression *retval = PEEK(1); + _Py_UOpsSymbolicValue *retval = PEEK(1); STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; @@ -1344,7 +1141,7 @@ uop_abstract_interpret_single_inst( stack_pointer = ctx->frame->stack_pointer; // Push retval into new frame. STACK_GROW(1); - _Py_UOpsSymbolicExpression *new_retval = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *new_retval = sym_init_unknown(ctx); if (new_retval == NULL) { goto error; } @@ -1354,26 +1151,19 @@ uop_abstract_interpret_single_inst( } case SWAP: { - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } - - _Py_UOpsSymbolicExpression *top; - _Py_UOpsSymbolicExpression *bottom; + _Py_UOpsSymbolicValue *top; + _Py_UOpsSymbolicValue *bottom; top = stack_pointer[-1]; bottom = stack_pointer[-2 - (oparg-2)]; assert(oparg >= 2); - _Py_UOpsSymbolicExpression *new_top = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *new_top = sym_init_unknown(ctx); if (new_top == NULL) { goto error; } sym_copy_type_number(top, new_top); - _Py_UOpsSymbolicExpression *new_bottom = sym_init_unknown(ctx); + _Py_UOpsSymbolicValue *new_bottom = sym_init_unknown(ctx); if (new_bottom == NULL) { goto error; } @@ -1386,32 +1176,20 @@ uop_abstract_interpret_single_inst( case _SET_IP: case _CHECK_VALIDITY: case _SAVE_RETURN_OFFSET: - if (write_stack_to_ir(ctx, inst, true) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *inst) < 0) { - goto error; - } break; default: DPRINTF(1, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - - // Store the frame symbolic to extract information later - if (opcode == _INIT_CALL_PY_EXACT_ARGS) { - ctx->new_frame_sym = PEEK(1); - DPRINTF(3, "call_py_exact_args: {"); - for (Py_ssize_t i = 0; i < (ctx->new_frame_sym->operand_count); i++) { - DPRINTF(3, "#%ld (%s)", i, ((ctx->new_frame_sym->operands[i]->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[ctx->new_frame_sym->operands[i]->inst.opcode])) - } - DPRINTF(3, "} \n"); - } assert(ctx->frame != NULL); DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); + if (emit_i(&ctx->emitter, new_inst) < 0) { + return ABSTRACT_INTERP_ERROR; + } + return ABSTRACT_INTERP_NORMAL; pop_2_error_tier_two: @@ -1421,19 +1199,13 @@ uop_abstract_interpret_single_inst( DPRINTF(1, "Encountered error in abstract interpreter\n"); return ABSTRACT_INTERP_ERROR; -guard_required: - DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); - ctx->frame->stack_pointer = stack_pointer; - assert(STACK_LEVEL() >= 0); - - return ABSTRACT_INTERP_GUARD_REQUIRED; - } -static _Py_UOpsAbstractInterpContext * +int uop_abstract_interpret( PyCodeObject *co, _PyUOpInstruction *trace, + _PyUOpInstruction *new_trace, int trace_len, int curr_stacklen ) @@ -1452,7 +1224,7 @@ uop_abstract_interpret( ctx = abstractinterp_context_new( co, curr_stacklen, - trace_len); + trace_len, new_trace); if (ctx == NULL) { goto error; } @@ -1470,17 +1242,11 @@ uop_abstract_interpret( !op_is_guard(curr->opcode)) { DPRINTF(3, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); if (first_impure) { - if (write_stack_to_ir(ctx, curr, false) < 0) { - goto error; - } if (clear_locals_type_info(ctx) < 0) { goto error; } } first_impure = false; - if (ir_plain_inst(ctx->ir, *curr) < 0) { - goto error; - } } else { first_impure = true; @@ -1493,244 +1259,26 @@ uop_abstract_interpret( if (status == ABSTRACT_INTERP_ERROR) { goto error; } - else if (status == ABSTRACT_INTERP_GUARD_REQUIRED) { - DPRINTF(3, "GUARD\n"); - // Emit the state of the stack first. - // Since this is a guard, copy over the type info - if (write_stack_to_ir(ctx, curr, true) < 0) { - goto error; - } - if (ir_plain_inst(ctx->ir, *curr) < 0) { - goto error; - } - } curr++; } - ctx->terminating = curr; - if (write_stack_to_ir(ctx, curr, false) < 0) { + assert(op_is_end(curr->opcode)); + if (emit_i(&ctx->emitter, *curr) < 0) { goto error; } - return ctx; - -error: - Py_XDECREF(ctx); - return NULL; -} - -typedef struct _Py_UOpsEmitter { - _PyUOpInstruction *writebuffer; - _PyUOpInstruction *writebuffer_end; - _PyUOpInstruction *writebuffer_true_end; - int curr_i; - int curr_reserve_i; - - int consumed_localsplus_slots; - _Py_UOpsOptIREntry *curr_frame_ir_entry; -} _Py_UOpsEmitter; - -static inline int -emit_i(_Py_UOpsEmitter *emitter, - _PyUOpInstruction inst) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - if (emitter->curr_i < 0) { - DPRINTF(2, "out of emission space\n"); - return -1; - } - if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { - DPRINTF(2, "out of emission space\n"); - return -1; - } - DPRINTF(2, "Emitting instruction at [%d] op: %s, oparg: %d, target: %d, operand: %" PRIu64 " \n", - emitter->curr_i, - (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], - inst.oparg, - inst.target, - inst.operand); - emitter->writebuffer[emitter->curr_i] = inst; - emitter->curr_i++; - return 0; -} - - - -static int -count_stack_operands(_Py_UOpsSymbolicExpression *sym) -{ - int total = 0; - for (Py_ssize_t i = 0; i < sym->operand_count; i++) { - if (op_is_stackvalue(sym->operands[i]->inst.opcode)) { - total++; - } - } - return total; -} - -static int -compile_sym_to_uops(_Py_UOpsEmitter *emitter, - _Py_UOpsSymbolicExpression *sym, - _Py_UOpsAbstractInterpContext *ctx) -{ - _PyUOpInstruction inst = sym->inst;; - // Since CPython is a stack machine, just compile in the order - // seen in the operands, then the instruction itself. - - if (op_is_terminal(sym->inst.opcode)) { - // These are for unknown stack entries. - if (op_is_stackvalue(sym->inst.opcode)) { - // Leave it be. These are initial values from the start - return 0; - } - if (sym->inst.opcode == INIT_FAST) { - inst.opcode = LOAD_FAST; - } - return emit_i(emitter, inst); - } - - // Constant propagated value, load immediate constant - if (sym->ty_number->const_val != NULL && !op_is_stackvalue(sym->inst.opcode)) { - // Shrink the stack if operands consist of stack values. - // We don't need them anymore. This could happen because - // the operands first need to be guarded and the guard could not - // be eliminated via constant propagation. - int stack_operands = count_stack_operands(sym); - if (stack_operands) { - inst.opcode = _SHRINK_STACK; - inst.oparg = (int)sym->operand_count; - inst.operand = 0; - if (emit_i(emitter, inst) < 0) { - return -1; - } - } - - inst.opcode = _Py_IsImmortal(sym->ty_number->const_val) - ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - inst.oparg = sym->inst.oparg; - inst.operand = (uint64_t)Py_NewRef(sym->ty_number->const_val); - return emit_i(emitter, inst); - } - - // Compile each operand - Py_ssize_t operands_count = sym->operand_count; - for (Py_ssize_t i = 0; i < operands_count; i++) { - if (sym->operands[i] == NULL) { - continue; - } - // TODO Py_EnterRecursiveCall ? - if (compile_sym_to_uops( - emitter, - sym->operands[i], - ctx) < 0) { - return -1; - } - } - - // Finally, emit the operation itself. - return emit_i(emitter, sym->inst); -} - -static int -emit_uops_from_ctx( - _Py_UOpsAbstractInterpContext *ctx, - _PyUOpInstruction *trace_writebuffer, - _PyUOpInstruction *writebuffer_end, - int *nop_to -) -{ - -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - - - _Py_UOpsAbstractFrame *root_frame = ctx->frame; - while (root_frame->prev != NULL) { - root_frame = root_frame->prev; - } - _Py_UOpsEmitter emitter = { - trace_writebuffer, - writebuffer_end, - writebuffer_end, - 0, - (int)(writebuffer_end - trace_writebuffer), - 0, - root_frame->frame_ir_entry - }; - - _Py_UOps_Opt_IR *ir = ctx->ir; - int entries = ir->curr_write; - // First entry reserved for the root frame info. - for (int i = 1; i < entries; i++) { - _Py_UOpsOptIREntry *curr = &ir->entries[i]; - switch (curr->typ) { - case IR_SYMBOLIC: { - DPRINTF(3, "symbolic: expr: %s oparg: %d, operand: %p\n", - (curr->expr->inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->expr->inst.opcode], - curr->expr->inst.oparg, - (void *)curr->expr->inst.operand); - if (compile_sym_to_uops(&emitter, curr->expr, ctx) < 0) { - goto error; - } - // Anything less means no assignment target at all. - if (curr->assignment_target >= TARGET_UNUSED) { - _PyUOpInstruction inst = { - curr->assignment_target == TARGET_UNUSED - ? POP_TOP : STORE_FAST, - curr->assignment_target, 0, 0}; - if (emit_i(&emitter, inst) < 0) { - goto error; - } - } - break; - } - case IR_PLAIN_INST: { - if (emit_i(&emitter, curr->inst) < 0) { - goto error; - } - break; - } - case IR_FRAME_PUSH_INFO: { - _Py_UOpsOptIREntry *prev = emitter.curr_frame_ir_entry; - emitter.curr_frame_ir_entry = curr; - curr->prev_frame_ir = prev; - break; - } - case IR_FRAME_POP_INFO: { - _Py_UOpsOptIREntry *prev = emitter.curr_frame_ir_entry->prev_frame_ir; - // There will always be the root frame. - assert(prev != NULL); - emitter.curr_frame_ir_entry->prev_frame_ir = NULL; - emitter.curr_frame_ir_entry = prev; - break; - } - case IR_NOP: break; - } - } + Py_DECREF(ctx); - if (emit_i(&emitter, *ctx->terminating) < 0) { - return -1; - } - *nop_to = (int)(emitter.writebuffer_end - emitter.writebuffer); - return emitter.curr_i; + return (int)(curr - trace); error: + Py_XDECREF(ctx); return -1; } + static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -1827,49 +1375,41 @@ _Py_uop_analyze_and_optimize( { _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; - _Py_UOpsAbstractInterpContext *ctx = NULL; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size * OVERALLOCATE_FACTOR); if (temp_writebuffer == NULL) { goto error; } - // Pass: Abstract interpretation and symbolic analysis - ctx = uop_abstract_interpret( - co, buffer, + int new_trace_len = uop_abstract_interpret( + co, buffer, temp_writebuffer, buffer_size, curr_stacklen); - if (ctx == NULL) { + if (new_trace_len < 0) { goto error; } - _PyUOpInstruction *writebuffer_end = temp_writebuffer + buffer_size; - // Compile the SSA IR - int nop_to = 0; - int trace_len = emit_uops_from_ctx( - ctx, - temp_writebuffer, - writebuffer_end, - &nop_to - ); - if (trace_len < 0 || trace_len > buffer_size) { - goto error; - } + peephole_optimizations(temp_writebuffer, new_trace_len); - peephole_optimizations(temp_writebuffer, trace_len); + remove_unneeded_uops(temp_writebuffer, new_trace_len); // Fill in our new trace! - memcpy(buffer, temp_writebuffer, buffer_size * sizeof(_PyUOpInstruction)); + memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); - Py_DECREF(ctx); - remove_unneeded_uops(buffer, buffer_size); + // _NOP out the rest of the buffer. + + // Fill up the rest of the buffer with NOPs + _PyUOpInstruction *after = buffer + new_trace_len + 1; + while (after < (buffer + buffer_size)) { + after->opcode = _NOP; + after++; + } return 0; error: - Py_XDECREF(ctx); // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index 553a92d4b14b42..ed7c800575f5b1 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -14,6 +14,7 @@ # Frame stuff "_PUSH_FRAME", "_POP_FRAME", + "_INIT_CALL_PY_EXACT_ARGS", # Bookkeeping "_SET_IP", "_CHECK_VALIDITY", diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index d2f2025ecc3db1..f2e2c88a5348a8 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -51,7 +51,7 @@ def declare_variables( uop: Uop, out: CWriter, - default_type: str = "_Py_UOpsSymbolicExpression *", + default_type: str = "_Py_UOpsSymbolicValue *", skip_inputs: bool = False, skip_peeks: bool = False, ) -> None: @@ -66,7 +66,7 @@ def declare_variables( if var.name not in variables: type = default_type if var.size != "1" and var.type == "PyObject **": - type = "_Py_UOpsSymbolicExpression **" + type = "_Py_UOpsSymbolicValue **" variables.add(var.name) if var.condition: out.emit(f"{type}{var.name} = NULL;\n") @@ -81,7 +81,7 @@ def declare_variables( variables.add(var.name) type = default_type if var.size != "1" and var.type == "PyObject **": - type = "_Py_UOpsSymbolicExpression **" + type = "_Py_UOpsSymbolicValue **" if var.condition: out.emit(f"{type}{var.name} = NULL;\n") else: @@ -192,16 +192,10 @@ def get_subexpressions( def new_sym( constant: str | None, - arr_var_size: int | str | None, - arr_var_name: str | None, - subexpresion_count: int | str, - subexpressions: str, ) -> str: return ( - f"_Py_UOpsSymbolicExpression_New(" - f"ctx, *inst, {constant or 'NULL'}, " - f"{arr_var_size or 0}, {arr_var_name or 'NULL'}, " - f"{subexpresion_count} {subexpressions});" + f"_Py_UOpsSymbolicValue_New(" + f"ctx, {constant or 'NULL'}); " ) @@ -219,9 +213,7 @@ def _write_body_abstract_interp_pure_uop( len(uop.stack.outputs) == 1 ), f"Currently we only support 1 stack output for pure ops: {uop}" # uop is mandatory - we cannot const evaluate it - sym = new_sym( - None, arr_var_size, arr_var_name, len(mangled_uop.stack.inputs), subexpressions - ) + sym = new_sym(None) if uop.name in NO_CONST_OR_TYPE_EVALUATE: out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}") return @@ -245,22 +237,19 @@ def _write_body_abstract_interp_pure_uop( out.emit(f"{var.name} = get_const({mangled_var.name});\n") emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) out.emit("\n") - maybe_const_val = new_sym( - f"(PyObject *){uop.stack.outputs[0].name}", - None, - None, - len(mangled_uop.stack.inputs), - subexpressions, - ) + const_val = f"(PyObject *){uop.stack.outputs[0].name}" + maybe_const_val = new_sym(const_val) out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") - + out.emit(f"shrink_stack.oparg = {len(uop.stack.inputs)};\n") + out.emit(f" if (emit_const(&ctx->emitter, {const_val}, shrink_stack) < 0) {{ goto error; }}\n") + out.emit("new_inst.opcode = _NOP;") out.emit("}\n") out.emit("else {\n") - sym = new_sym(None, None, None, len(mangled_uop.stack.inputs), subexpressions) + sym = new_sym(None) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") out.emit("}\n") else: - sym = new_sym(None, None, None, len(mangled_uop.stack.inputs), subexpressions) + sym = new_sym(None) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") @@ -281,7 +270,6 @@ def _write_body_abstract_interp_guard_uop( # 1. Attempt to perform guard elimination # 2. Type propagate for guard success if uop.name in NO_CONST_OR_TYPE_EVALUATE: - out.emit("goto guard_required;") return for cache in uop.caches: @@ -310,14 +298,14 @@ def _write_body_abstract_interp_guard_uop( out.emit(f"{var.name} = get_const({mangled_var.name});\n") emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) out.emit("\n") - # Guard elimination - if we are successful, don't add it to the symexpr! + # Guard elimination out.emit('DPRINTF(3, "const eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;") out.emit("break;\n") out.emit("}\n") # Does the input specify typed inputs? if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): - out.emit("goto guard_required;\n") return # If the input types already match, eliminate the guard # Read the cache information to check the auxiliary type information @@ -339,16 +327,17 @@ def _write_body_abstract_interp_guard_uop( aux = "0" if aux is None else aux # Check that the input type information match (including auxiliary info) predicates.append( - f"sym_matches_type((_Py_UOpsSymbolicExpression *){output_var.name}, {typname}, (uint32_t){aux})" + f"sym_matches_type((_Py_UOpsSymbolicValue *){output_var.name}, {typname}, (uint32_t){aux})" ) # Propagate mode - set the types propagates.append( - f"sym_set_type((_Py_UOpsSymbolicExpression *){output_var.name}, {typname}, (uint32_t){aux})" + f"sym_set_type((_Py_UOpsSymbolicValue *){output_var.name}, {typname}, (uint32_t){aux})" ) out.emit("// Type guard elimination\n") out.emit(f"if ({' && '.join(predicates)}){{\n") out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;") out.emit("break;\n") out.emit("}\n") # Else we need the guard @@ -356,7 +345,6 @@ def _write_body_abstract_interp_guard_uop( out.emit("// Type propagation\n") for prop in propagates: out.emit(f"{prop};\n") - out.emit("goto guard_required;\n") out.emit("}\n") @@ -431,7 +419,7 @@ def generate_tier2_abstract( if not uop.properties.always_exits: # Guards strictly only peek if not uop.properties.guard: - stack.flush(out, cast_type="_Py_UOpsSymbolicExpression *") + stack.flush(out, cast_type="_Py_UOpsSymbolicValue *") out.emit("break;\n") out.start_line() out.emit("}") From 68298447133ea0ae59d8350101c2252b0bf7d3e5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:03:23 +0800 Subject: [PATCH 039/111] fix error cases --- Python/abstract_interp_cases.c.h | 17 +++++++++++++++++ Python/optimizer_analysis.c | 8 +++++++- .../cases_generator/tier2_abstract_generator.py | 6 +++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index b9a4c5a63ac3e0..444a60319e9860 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -24,6 +24,7 @@ __value_ = stack_pointer[-1]; __receiver_ = stack_pointer[-2]; __value_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__value_ == NULL) { goto error; } stack_pointer[-2] = __value_; stack_pointer += -1; break; @@ -49,11 +50,13 @@ assert(PyBool_Check(value)); res = Py_IsFalse(value) ? Py_True : Py_False; __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 1; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -179,11 +182,13 @@ if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -211,11 +216,13 @@ if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -243,11 +250,13 @@ if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -306,11 +315,13 @@ ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -339,11 +350,13 @@ ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -372,11 +385,13 @@ ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation @@ -434,11 +449,13 @@ if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; // Type propagation diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c87d0711e26217..edf4db45dde600 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -393,8 +393,8 @@ abstractinterp_context_new(PyCodeObject *co, self->t_arena.arena = NULL; self->s_arena.arena = NULL; self->frame_info.arena = NULL; + self->frame = NULL; } - self->frame = NULL; Py_XDECREF(self); Py_XDECREF(frame); return NULL; @@ -876,6 +876,9 @@ emit_const(uops_emitter *emitter, } int load_const_opcode = _Py_IsImmortal(const_val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + if (load_const_opcode == _LOAD_CONST_INLINE) { + Py_INCREF(const_val); + } _PyUOpInstruction load_const = {load_const_opcode, 0, 0, const_val}; if (emit_i(emitter, load_const) < 0) { return -1; @@ -1078,6 +1081,9 @@ uop_abstract_interpret_single_inst( ctx->new_frame_sym.self_or_null = __self_or_null_; ctx->new_frame_sym.args = __args_; __new_frame_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + if (__new_frame_ == NULL) { + goto error; + } stack_pointer[-2 - oparg] = (_Py_UOpsSymbolicValue *)__new_frame_; stack_pointer += -1 - oparg; break; diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index f2e2c88a5348a8..54ccec66152f21 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -215,7 +215,8 @@ def _write_body_abstract_interp_pure_uop( # uop is mandatory - we cannot const evaluate it sym = new_sym(None) if uop.name in NO_CONST_OR_TYPE_EVALUATE: - out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}") + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") return # Constant prop only handles one output, and no variadic inputs. @@ -240,6 +241,7 @@ def _write_body_abstract_interp_pure_uop( const_val = f"(PyObject *){uop.stack.outputs[0].name}" maybe_const_val = new_sym(const_val) out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") + out.emit(f"if({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") out.emit(f"shrink_stack.oparg = {len(uop.stack.inputs)};\n") out.emit(f" if (emit_const(&ctx->emitter, {const_val}, shrink_stack) < 0) {{ goto error; }}\n") out.emit("new_inst.opcode = _NOP;") @@ -247,10 +249,12 @@ def _write_body_abstract_interp_pure_uop( out.emit("else {\n") sym = new_sym(None) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") out.emit("}\n") else: sym = new_sym(None) out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") From 1de0ccb5979e719fa62c120bf6e43b997fe43d85 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:24:43 +0800 Subject: [PATCH 040/111] fix leak, fix peepholer --- Lib/test/test_capi/test_opt.py | 22 ++++++++++++ Python/optimizer_analysis.c | 62 +++++++++++++++++++++++++--------- 2 files changed, 68 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 05432ec47d8424..49bcc4da53f03c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -566,6 +566,28 @@ def testfunc(loops): uops = {opname for opname, _, _ in ex} self.assertNotIn("_SHRINK_STACK", uops) + def test_int_constant_propagation_many(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + x + y + x + y + x + y + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 4) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + def test_int_type_propagation(self): def testfunc(loops): num = 0 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index edf4db45dde600..7f37deac0468a3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -23,6 +23,8 @@ #define OVERALLOCATE_FACTOR 3 +#define PEEPHOLE_MAX_ATTEMPTS 10 + #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; #define DPRINTF(level, ...) \ @@ -227,6 +229,7 @@ abstractinterp_dealloc(PyObject *o) } PyMem_Free(self->t_arena.arena); PyMem_Free(self->s_arena.arena); + PyMem_Free(self->frame_info.arena); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1207,7 +1210,7 @@ uop_abstract_interpret_single_inst( } -int +static int uop_abstract_interpret( PyCodeObject *co, _PyUOpInstruction *trace, @@ -1323,9 +1326,35 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } -static void +static inline bool +op_is_zappable(int opcode) +{ + switch(opcode) { + case _SET_IP: + case _CHECK_VALIDITY: + case _LOAD_CONST_INLINE: + case _LOAD_CONST: + case _LOAD_FAST: + case _LOAD_CONST_INLINE_BORROW: + return true; + default: + return false; + } +} + +static inline bool +op_is_load(int opcode) +{ + return (opcode == _LOAD_CONST_INLINE || + opcode == _LOAD_CONST || + opcode == LOAD_FAST || + opcode == _LOAD_CONST_INLINE_BORROW); +} + +static int peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) { + bool done = true; for (int i = 0; i < buffer_size; i++) { _PyUOpInstruction *curr = buffer + i; int oparg = curr->oparg; @@ -1333,24 +1362,20 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) case _SHRINK_STACK: { // If all that precedes a _SHRINK_STACK is a bunch of LOAD_FAST, // then we can safely eliminate that without side effects. - int load_fast_count = 0; + int load_count = 0; _PyUOpInstruction *back = curr-1; - while((back->opcode == _SET_IP || - back->opcode == _CHECK_VALIDITY || - back->opcode == LOAD_FAST) && - load_fast_count < oparg) { - load_fast_count += back->opcode == LOAD_FAST; + while(op_is_zappable(back->opcode) && + load_count < oparg) { + load_count += op_is_load(back->opcode); back--; } - if (load_fast_count == oparg) { + if (load_count == oparg) { + done = false; curr->opcode = NOP; back = curr-1; - load_fast_count = 0; - while((back->opcode == _SET_IP || - back->opcode == _CHECK_VALIDITY || - back->opcode == LOAD_FAST) && - load_fast_count < oparg) { - load_fast_count += back->opcode == LOAD_FAST; + load_count = 0; + while(load_count < oparg) { + load_count += op_is_load(back->opcode); back->opcode = NOP; back--; } @@ -1368,6 +1393,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) break; } } + return done; } @@ -1396,7 +1422,11 @@ _Py_uop_analyze_and_optimize( goto error; } - peephole_optimizations(temp_writebuffer, new_trace_len); + for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && + !peephole_optimizations(temp_writebuffer, new_trace_len); + peephole_attempts++) { + + } remove_unneeded_uops(temp_writebuffer, new_trace_len); From d2917b78183a5ad57a24134f830daef943a31332 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:32:42 +0800 Subject: [PATCH 041/111] documentation, cleanup whitespace --- Python/abstract_interp_cases.c.h | 34 +++++++++---------- Python/optimizer_analysis.c | 11 ++++++ .../tier2_abstract_generator.py | 2 +- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 444a60319e9860..c2c2102d99f66c 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -23,7 +23,7 @@ _Py_UOpsSymbolicValue *__receiver_; __value_ = stack_pointer[-1]; __receiver_ = stack_pointer[-2]; - __value_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __value_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__value_ == NULL) { goto error; } stack_pointer[-2] = __value_; stack_pointer += -1; @@ -49,13 +49,13 @@ value = get_const(__value_); assert(PyBool_Check(value)); res = Py_IsFalse(value) ? Py_True : Py_False; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 1; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -181,13 +181,13 @@ res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -215,13 +215,13 @@ res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -249,13 +249,13 @@ res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -314,13 +314,13 @@ ((PyFloatObject *)left)->ob_fval * ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -349,13 +349,13 @@ ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -384,13 +384,13 @@ ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -448,13 +448,13 @@ res = PyUnicode_Concat(left, right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP;} else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7f37deac0468a3..70e3f76b30a586 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,3 +1,14 @@ +/* + * This file contains the optimizer for CPython uops. + * It performs a traditional data-flow analysis[1] over the trace of uops. + * Using the information gained, it chooses to emit, or skip certain instructions + * if possible. + * + * [1] For information on data-flow analysis, please see page 27 onwards in + * https://ilyasergey.net/CS4212/_static/lectures/PLDI-Week-12-dataflow.pdf + * Credits to the courses UPenn Compilers (CIS 341) and NUS Compiler Design (CS4212). + * + * */ #include "Python.h" #include "pycore_interp.h" #include "pycore_opcode_metadata.h" diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 54ccec66152f21..c30691893aec86 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -195,7 +195,7 @@ def new_sym( ) -> str: return ( f"_Py_UOpsSymbolicValue_New(" - f"ctx, {constant or 'NULL'}); " + f"ctx, {constant or 'NULL'});" ) From 553ac53d3ca256d64b7cf712ad6aca23976cb2f5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:47:28 +0800 Subject: [PATCH 042/111] reduce mem usage --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 70e3f76b30a586..856134da019780 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1419,7 +1419,7 @@ _Py_uop_analyze_and_optimize( _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; - temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size * OVERALLOCATE_FACTOR); + temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size); if (temp_writebuffer == NULL) { goto error; } From 4555b0c0bcda9656a7630cc8223b66598a83260a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:51:37 +0800 Subject: [PATCH 043/111] remove INIT_FAST add ignored --- Include/internal/pycore_uop_ids.h | 5 ++--- Include/internal/pycore_uop_metadata.h | 2 -- Python/abstract_interp_cases.c.h | 4 ---- Python/bytecodes.c | 5 ----- Python/executor_cases.c.h | 5 ----- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 6 files changed, 3 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3975e86b984263..ff09dfe5fa4d32 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -233,9 +233,8 @@ extern "C" { #define _LOAD_CONST_INLINE 380 #define _LOAD_CONST_INLINE_BORROW 381 #define _INTERNAL_INCREMENT_OPT_COUNTER 382 -#define INIT_FAST 383 -#define _SHRINK_STACK 384 -#define MAX_UOP_ID 384 +#define _SHRINK_STACK 383 +#define MAX_UOP_ID 383 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 207333f0e38c4c..740d2ed4d19a81 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -205,12 +205,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, - [INIT_FAST] = 0, [_SHRINK_STACK] = HAS_ARG_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { - [INIT_FAST] = "INIT_FAST", [_BEFORE_ASYNC_WITH] = "_BEFORE_ASYNC_WITH", [_BEFORE_WITH] = "_BEFORE_WITH", [_BINARY_OP] = "_BINARY_OP", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index c2c2102d99f66c..b567559fe0953f 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1964,10 +1964,6 @@ break; } - case INIT_FAST: { - break; - } - case _SHRINK_STACK: { stack_pointer += -oparg; break; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index fbf6cf30d98efd..043d83bb06d3d0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4084,11 +4084,6 @@ dummy_func( exe->count++; } - // Represents a possibly uninitialized value in the abstract interpreter. - op(INIT_FAST, (--)) { - // Nothing, just a sentinel. - } - op(_SHRINK_STACK, (args[oparg] --)) { DECREF_INPUTS(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9ef2e5d1c39d73..10568aa2615346 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3417,11 +3417,6 @@ break; } - case INIT_FAST: { - // Nothing, just a sentinel. - break; - } - case _SHRINK_STACK: { PyObject **args; oparg = CURRENT_OPARG(); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2f9e80d6ab6737..ad35d36e5e8093 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -733,6 +733,6 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type - Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - Modules/clinic/md5module.c.h _md5_md5 _keywords - From 4155d848b304c2761db8b2e89da3644738eb3c17 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:59:57 +0800 Subject: [PATCH 044/111] cleanup --- Python/optimizer_analysis.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 856134da019780..a4b19b69784766 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -44,25 +44,6 @@ #define DPRINTF(level, ...) #endif -// This represents a value that "terminates" the symbolic. -static inline bool -op_is_terminal(uint32_t opcode) -{ - return (opcode == _LOAD_FAST || - opcode == _LOAD_FAST_CHECK || - opcode == _LOAD_FAST_AND_CLEAR || - opcode == INIT_FAST || - opcode == CACHE || - opcode == PUSH_NULL); -} - -// This represents a value that is already on the stack. -static inline bool -op_is_stackvalue(uint32_t opcode) -{ - return (opcode == CACHE); -} - // See the interpreter DSL in ./Tools/cases_generator/interpreter_definition.md for what these correspond to. typedef enum { // Types with refinement info @@ -878,13 +859,6 @@ emit_const(uops_emitter *emitter, PyObject *const_val, _PyUOpInstruction shrink_stack) { -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif if (emit_i(emitter, shrink_stack) < 0) { return -1; } @@ -893,7 +867,7 @@ emit_const(uops_emitter *emitter, if (load_const_opcode == _LOAD_CONST_INLINE) { Py_INCREF(const_val); } - _PyUOpInstruction load_const = {load_const_opcode, 0, 0, const_val}; + _PyUOpInstruction load_const = {load_const_opcode, 0, 0, (uintptr_t)const_val}; if (emit_i(emitter, load_const) < 0) { return -1; } @@ -1007,8 +981,6 @@ uop_abstract_interpret_single_inst( _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); // Might be NULL - replace with LOAD_FAST_CHECK if (sym_is_type(local, NULL_TYPE)) { - _PyUOpInstruction temp = *inst; - temp.opcode = LOAD_FAST_CHECK; _Py_UOpsSymbolicValue * new_local = sym_init_unknown(ctx); if (new_local == NULL) { goto error; From e48a794e882d2b2800e05ac3265bafd85b8e267f Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 23:13:06 +0800 Subject: [PATCH 045/111] Add back peephole for constants --- Python/optimizer_analysis.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a4b19b69784766..09185a8e29c6a0 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1011,7 +1011,14 @@ uop_abstract_interpret_single_inst( STACK_GROW(1); PEEK(1) = (_Py_UOpsSymbolicValue *)GETITEM( ctx, oparg); - assert(PEEK(1)->ty_number->const_val != NULL); + assert(is_const(PEEK(1))); + // Peephole: inline constants. + PyObject *val = get_const(PEEK(1)); + new_inst.opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + if (new_inst.opcode == _LOAD_CONST_INLINE) { + Py_INCREF(val); + } + new_inst.operand = val; break; } case STORE_FAST_MAYBE_NULL: From 7e7ba2db1b76a6c5ece3a6d0ff937303e321beda Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 23:17:41 +0800 Subject: [PATCH 046/111] fix compiler warning --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 09185a8e29c6a0..1f395511ee187b 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1018,7 +1018,7 @@ uop_abstract_interpret_single_inst( if (new_inst.opcode == _LOAD_CONST_INLINE) { Py_INCREF(val); } - new_inst.operand = val; + new_inst.operand = (uintptr_t)val; break; } case STORE_FAST_MAYBE_NULL: From 9aa7ccdd3849a1abb924df8f00047cd3d59901e6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 25 Jan 2024 23:24:48 +0800 Subject: [PATCH 047/111] cut the constant factor --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 1f395511ee187b..83c36fe629b0ea 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -34,7 +34,7 @@ #define OVERALLOCATE_FACTOR 3 -#define PEEPHOLE_MAX_ATTEMPTS 10 +#define PEEPHOLE_MAX_ATTEMPTS 5 #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; From d3d1e609624347d261d9b686247fb3bcc34fc23a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:09:45 +0800 Subject: [PATCH 048/111] loop peeling --- Include/internal/pycore_uop_ids.h | 13 +- Include/internal/pycore_uop_metadata.h | 2 + Lib/test/test_capi/test_opt.py | 640 ++++++++++++------------- Python/abstract_interp_cases.c.h | 4 + Python/bytecodes.c | 5 + Python/executor_cases.c.h | 7 + Python/optimizer.c | 3 +- Python/optimizer_analysis.c | 32 +- Python/pylifecycle.c | 2 +- 9 files changed, 376 insertions(+), 332 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ff09dfe5fa4d32..8222bb12bcf66a 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -229,12 +229,13 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 376 #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 -#define _CHECK_VALIDITY 379 -#define _LOAD_CONST_INLINE 380 -#define _LOAD_CONST_INLINE_BORROW 381 -#define _INTERNAL_INCREMENT_OPT_COUNTER 382 -#define _SHRINK_STACK 383 -#define MAX_UOP_ID 383 +#define _JUMP_ABSOLUTE 379 +#define _CHECK_VALIDITY 380 +#define _LOAD_CONST_INLINE 381 +#define _LOAD_CONST_INLINE_BORROW 382 +#define _INTERNAL_INCREMENT_OPT_COUNTER 383 +#define _SHRINK_STACK 384 +#define MAX_UOP_ID 384 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 740d2ed4d19a81..e33703440e368b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -201,6 +201,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, + [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, @@ -314,6 +315,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", + [_JUMP_ABSOLUTE] = "_JUMP_ABSOLUTE", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_LIST_APPEND] = "_LIST_APPEND", [_LIST_EXTEND] = "_LIST_EXTEND", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 49bcc4da53f03c..46768c2f7b3172 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -544,49 +544,49 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - def test_int_constant_propagation(self): - def testfunc(loops): - num = 0 - for _ in range(loops): - x = 0 - y = 1 - a = x + y - return 1 - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 1) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 0) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_SHRINK_STACK", uops) - - def test_int_constant_propagation_many(self): - def testfunc(loops): - num = 0 - for _ in range(loops): - x = 0 - y = 1 - a = x + y + x + y + x + y + x + y - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 4) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 0) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_SHRINK_STACK", uops) + # def test_int_constant_propagation(self): + # def testfunc(loops): + # num = 0 + # for _ in range(loops): + # x = 0 + # y = 1 + # a = x + y + # return 1 + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 1) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 0) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_SHRINK_STACK", uops) + # + # def test_int_constant_propagation_many(self): + # def testfunc(loops): + # num = 0 + # for _ in range(loops): + # x = 0 + # y = 1 + # a = x + y + x + y + x + y + x + y + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # self.assertEqual(res, 4) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 0) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_SHRINK_STACK", uops) def test_int_type_propagation(self): def testfunc(loops): @@ -609,283 +609,283 @@ def testfunc(loops): guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] self.assertEqual(len(binop_count), 3) self.assertEqual(len(guard_both_int_count), 1) - - def test_int_impure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num - y = 1 - x // 2 - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_int_impure_region_attr(self): - class A: - foo = 1 - def testfunc(loops): - num = 0 - while num < loops: - x = A.foo + A.foo - y = 1 - A.foo - a = x + y - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) - - def test_call_constant_propagate_past_impure(self): - def testfunc(n): - for i in range(n): - x = 1 - y = 1 - x // y - z = x + y - return z - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(20) - - ex = get_first_executor(testfunc) - self.assertEqual(res, 2) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - def test_int_large_pure_region(self): - def testfunc(loops): - num = 0 - while num < loops: - x = num + num + num - num + num - num + num + num + num - num + num - num - y = 1 - a = x + num + num + num - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 11) - - def test_call_py_exact_args(self): - def testfunc(n): - def dummy(x): - return x+1 - for i in range(n): - dummy(i) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) - - def test_frame_instance_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a - - a = A() - def testfunc(n): - for i in range(n): - a.foo() - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - - def test_frame_class_method(self): - class A: - def __init__(self): - self.a = 1 - def foo(self): - return self.a - - def testfunc(n): - a = A() - for i in range(n): - A.foo(a) - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(32) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_LOAD_ATTR_CLASS", uops) - - def test_call_constant_propagate_in_frame(self): - def testfunc(n): - def dummy(): - x = 1 - y = 1 - return x+y - for i in range(n): - x = dummy() - return x - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(20) - - ex = get_first_executor(testfunc) - self.assertEqual(res, 2) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - def test_call_constant_propagate_through_frame(self): - def testfunc(n): - def dummy(x): - return x+1 - for i in range(n): - x = dummy(3) - return x - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(20) - - ex = get_first_executor(testfunc) - self.assertEqual(res, 4) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - def test_int_type_propagate_through_range(self): - def testfunc(n): - - for i in range(n): - x = i + i - return x - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(20) - - ex = get_first_executor(testfunc) - self.assertEqual(res, 19 * 2) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_GUARD_BOTH_INT", uops) - - def test_int_value_nubmering(self): - def testfunc(n): - - y = 1 - for i in range(n): - x = y - z = x - a = z - b = a - res = x + z + a + b - return res - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(20) - - ex = get_first_executor(testfunc) - self.assertEqual(res, 4) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_GUARD_BOTH_INT", uops) - guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] - self.assertEqual(len(guard_count), 1) - - def test_comprehension(self): - def testfunc(n): - for _ in range(n): - return [i for i in range(n)] - - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - - def test_truncated_zipfile(self): - import io - import zipfile - from random import random - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - FIXEDTEST_SIZE = 1000 - line_gen = [bytes("Zipfile test line %d. random float: %f\n" % - (i, random()), "ascii") - for i in range(FIXEDTEST_SIZE)] - - data = b''.join(line_gen) - compression = zipfile.ZIP_DEFLATED - fp = io.BytesIO() - with zipfile.ZipFile(fp, mode='w') as zipf: - zipf.writestr('strfile', data, compress_type=compression) - end_offset = fp.tell() - zipfiledata = fp.getvalue() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - zipopen.read() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read(100): - pass - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read1(100): - pass + # + # def test_int_impure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + # y = 1 + # x // 2 + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_int_impure_region_attr(self): + # class A: + # foo = 1 + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = A.foo + A.foo + # y = 1 + # A.foo + # a = x + y + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 3) + # + # def test_call_constant_propagate_past_impure(self): + # def testfunc(n): + # for i in range(n): + # x = 1 + # y = 1 + # x // y + # z = x + y + # return z + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 2) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # def test_int_large_pure_region(self): + # def testfunc(loops): + # num = 0 + # while num < loops: + # x = num + num + num - num + num - num + num + num + num - num + num - num + # y = 1 + # a = x + num + num + num + # num += 1 + # return a + # + # opt = _testinternalcapi.get_uop_optimizer() + # res = None + # with temporary_optimizer(opt): + # res = testfunc(64) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + # self.assertEqual(len(binop_count), 11) + # + # def test_call_py_exact_args(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # dummy(i) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertIn("_BINARY_OP_ADD_INT", uops) + # + # def test_frame_instance_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # a = A() + # def testfunc(n): + # for i in range(n): + # a.foo() + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + # + # def test_frame_class_method(self): + # class A: + # def __init__(self): + # self.a = 1 + # def foo(self): + # return self.a + # + # def testfunc(n): + # a = A() + # for i in range(n): + # A.foo(a) + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(32) + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_LOAD_ATTR_CLASS", uops) + # + # def test_call_constant_propagate_in_frame(self): + # def testfunc(n): + # def dummy(): + # x = 1 + # y = 1 + # return x+y + # for i in range(n): + # x = dummy() + # return x + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 2) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # def test_call_constant_propagate_through_frame(self): + # def testfunc(n): + # def dummy(x): + # return x+1 + # for i in range(n): + # x = dummy(3) + # return x + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 4) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_PUSH_FRAME", uops) + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # def test_int_type_propagate_through_range(self): + # def testfunc(n): + # + # for i in range(n): + # x = i + i + # return x + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 19 * 2) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_GUARD_BOTH_INT", uops) + # + # def test_int_value_nubmering(self): + # def testfunc(n): + # + # y = 1 + # for i in range(n): + # x = y + # z = x + # a = z + # b = a + # res = x + z + a + b + # return res + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # res = testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertEqual(res, 4) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertIn("_GUARD_BOTH_INT", uops) + # guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + # self.assertEqual(len(guard_count), 1) + # + # def test_comprehension(self): + # def testfunc(n): + # for _ in range(n): + # return [i for i in range(n)] + # + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # testfunc(20) + # + # ex = get_first_executor(testfunc) + # self.assertIsNotNone(ex) + # uops = {opname for opname, _, _ in ex} + # self.assertNotIn("_BINARY_OP_ADD_INT", uops) + # + # def test_truncated_zipfile(self): + # import io + # import zipfile + # from random import random + # opt = _testinternalcapi.get_uop_optimizer() + # with temporary_optimizer(opt): + # FIXEDTEST_SIZE = 1000 + # line_gen = [bytes("Zipfile test line %d. random float: %f\n" % + # (i, random()), "ascii") + # for i in range(FIXEDTEST_SIZE)] + # + # data = b''.join(line_gen) + # compression = zipfile.ZIP_DEFLATED + # fp = io.BytesIO() + # with zipfile.ZipFile(fp, mode='w') as zipf: + # zipf.writestr('strfile', data, compress_type=compression) + # end_offset = fp.tell() + # zipfiledata = fp.getvalue() + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # zipopen.read() + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # while zipopen.read(100): + # pass + # + # fp = io.BytesIO(zipfiledata) + # with zipfile.ZipFile(fp) as zipf: + # with zipf.open('strfile') as zipopen: + # fp.truncate(end_offset - 20) + # with self.assertRaises(EOFError): + # while zipopen.read1(100): + # pass diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index b567559fe0953f..43bac9cb4f0733 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1941,6 +1941,10 @@ break; } + case _JUMP_ABSOLUTE: { + break; + } + case _LOAD_CONST_INLINE: { _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 043d83bb06d3d0..c8670d91fd699d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4065,6 +4065,11 @@ dummy_func( DEOPT_IF(1); } + op(_JUMP_ABSOLUTE, (--)) { + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + } + op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 10568aa2615346..674c6339904e7d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3384,6 +3384,13 @@ break; } + case _JUMP_ABSOLUTE: { + oparg = CURRENT_OPARG(); + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + break; + } + case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; diff --git a/Python/optimizer.c b/Python/optimizer.c index 044a47dda4680c..9f5b22bbdc2dbf 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -776,7 +776,8 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) executor->trace[dest] = buffer[i]; int opcode = buffer[i].opcode; if (opcode == _POP_JUMP_IF_FALSE || - opcode == _POP_JUMP_IF_TRUE) + opcode == _POP_JUMP_IF_TRUE || + opcode == _JUMP_ABSOLUTE) { /* The oparg of the target will already have been set to its new offset */ int oparg = executor->trace[dest].oparg; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 83c36fe629b0ea..78c8f39f79de4a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1217,7 +1217,8 @@ uop_abstract_interpret( lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } #endif - // Initialize the symbolic consts + bool did_loop_peel = false; + int loop_peel_target = 0; _Py_UOpsAbstractInterpContext *ctx = NULL; @@ -1228,6 +1229,7 @@ uop_abstract_interpret( goto error; } +loop_peeling: _PyUOpInstruction *curr = trace; _PyUOpInstruction *end = trace + trace_len; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; @@ -1264,13 +1266,35 @@ uop_abstract_interpret( } assert(op_is_end(curr->opcode)); - if (emit_i(&ctx->emitter, *curr) < 0) { - goto error; + + // If we end in a loop, and we have a lot of space left, unroll the loop for added type stability + // https://en.wikipedia.org/wiki/Loop_unrolling + if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && + ((ctx->emitter.curr_i * 2) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { + did_loop_peel = true; + loop_peel_target = ctx->emitter.curr_i; + DPRINTF(2, "loop_peeling!\n"); + goto loop_peeling; + } + else { + if (did_loop_peel) { + assert(curr->opcode == _JUMP_TO_TOP); + _PyUOpInstruction jump_rel = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; + if (emit_i(&ctx->emitter, jump_rel) < 0) { + goto error; + } + } else { + if (emit_i(&ctx->emitter, *curr) < 0) { + goto error; + } + } } + + int res = ctx->emitter.curr_i; Py_DECREF(ctx); - return (int)(curr - trace); + return res; error: Py_XDECREF(ctx); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index c7666c7b142d95..f314710c773896 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1231,7 +1231,7 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } - enabled = 1; // TEMPORARY: always enable + // enabled = 1; // TEMPORARY: always enable if (enabled) { PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { From d8d82e0ec1b4029b397c18e5562ad65b79cd078d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:27:28 +0800 Subject: [PATCH 049/111] loop unrolling done --- Include/internal/pycore_uop_ids.h | 13 +- Include/internal/pycore_uop_metadata.h | 2 + Lib/test/test_capi/test_opt.py | 642 ++++++++++++------------- Python/abstract_interp_cases.c.h | 4 + Python/bytecodes.c | 3 + Python/executor_cases.c.h | 4 + Python/optimizer.c | 11 +- Python/optimizer_analysis.c | 10 +- 8 files changed, 358 insertions(+), 331 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8222bb12bcf66a..385a85881c2c48 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -230,12 +230,13 @@ extern "C" { #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 #define _JUMP_ABSOLUTE 379 -#define _CHECK_VALIDITY 380 -#define _LOAD_CONST_INLINE 381 -#define _LOAD_CONST_INLINE_BORROW 382 -#define _INTERNAL_INCREMENT_OPT_COUNTER 383 -#define _SHRINK_STACK 384 -#define MAX_UOP_ID 384 +#define _JUMP_ABSOLUTE_HEADER 380 +#define _CHECK_VALIDITY 381 +#define _LOAD_CONST_INLINE 382 +#define _LOAD_CONST_INLINE_BORROW 383 +#define _INTERNAL_INCREMENT_OPT_COUNTER 384 +#define _SHRINK_STACK 385 +#define MAX_UOP_ID 385 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e33703440e368b..bd2a3ec925a68b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -202,6 +202,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, + [_JUMP_ABSOLUTE_HEADER] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, @@ -316,6 +317,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", [_JUMP_ABSOLUTE] = "_JUMP_ABSOLUTE", + [_JUMP_ABSOLUTE_HEADER] = "_JUMP_ABSOLUTE_HEADER", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_LIST_APPEND] = "_LIST_APPEND", [_LIST_EXTEND] = "_LIST_EXTEND", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 46768c2f7b3172..ec5858a7aff605 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -544,49 +544,49 @@ def testfunc(n): class TestUopsOptimization(unittest.TestCase): - # def test_int_constant_propagation(self): - # def testfunc(loops): - # num = 0 - # for _ in range(loops): - # x = 0 - # y = 1 - # a = x + y - # return 1 - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 1) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 0) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_SHRINK_STACK", uops) - # - # def test_int_constant_propagation_many(self): - # def testfunc(loops): - # num = 0 - # for _ in range(loops): - # x = 0 - # y = 1 - # a = x + y + x + y + x + y + x + y - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # self.assertEqual(res, 4) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 0) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_SHRINK_STACK", uops) + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + + def test_int_constant_propagation_many(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + x + y + x + y + x + y + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 4) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) def test_int_type_propagation(self): def testfunc(loops): @@ -607,285 +607,285 @@ def testfunc(loops): self.assertEqual(res, 63) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] - self.assertEqual(len(binop_count), 3) + self.assertGreaterEqual(len(binop_count), 3) self.assertEqual(len(guard_both_int_count), 1) - # - # def test_int_impure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num - # y = 1 - # x // 2 - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_int_impure_region_attr(self): - # class A: - # foo = 1 - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = A.foo + A.foo - # y = 1 - # A.foo - # a = x + y - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 3) - # - # def test_call_constant_propagate_past_impure(self): - # def testfunc(n): - # for i in range(n): - # x = 1 - # y = 1 - # x // y - # z = x + y - # return z - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 2) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # def test_int_large_pure_region(self): - # def testfunc(loops): - # num = 0 - # while num < loops: - # x = num + num + num - num + num - num + num + num + num - num + num - num - # y = 1 - # a = x + num + num + num - # num += 1 - # return a - # - # opt = _testinternalcapi.get_uop_optimizer() - # res = None - # with temporary_optimizer(opt): - # res = testfunc(64) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - # self.assertEqual(len(binop_count), 11) - # - # def test_call_py_exact_args(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # dummy(i) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertIn("_BINARY_OP_ADD_INT", uops) - # - # def test_frame_instance_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # a = A() - # def testfunc(n): - # for i in range(n): - # a.foo() - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) - # - # def test_frame_class_method(self): - # class A: - # def __init__(self): - # self.a = 1 - # def foo(self): - # return self.a - # - # def testfunc(n): - # a = A() - # for i in range(n): - # A.foo(a) - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(32) - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_LOAD_ATTR_CLASS", uops) - # - # def test_call_constant_propagate_in_frame(self): - # def testfunc(n): - # def dummy(): - # x = 1 - # y = 1 - # return x+y - # for i in range(n): - # x = dummy() - # return x - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 2) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # def test_call_constant_propagate_through_frame(self): - # def testfunc(n): - # def dummy(x): - # return x+1 - # for i in range(n): - # x = dummy(3) - # return x - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 4) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_PUSH_FRAME", uops) - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # def test_int_type_propagate_through_range(self): - # def testfunc(n): - # - # for i in range(n): - # x = i + i - # return x - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 19 * 2) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_GUARD_BOTH_INT", uops) - # - # def test_int_value_nubmering(self): - # def testfunc(n): - # - # y = 1 - # for i in range(n): - # x = y - # z = x - # a = z - # b = a - # res = x + z + a + b - # return res - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # res = testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertEqual(res, 4) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertIn("_GUARD_BOTH_INT", uops) - # guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] - # self.assertEqual(len(guard_count), 1) - # - # def test_comprehension(self): - # def testfunc(n): - # for _ in range(n): - # return [i for i in range(n)] - # - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # testfunc(20) - # - # ex = get_first_executor(testfunc) - # self.assertIsNotNone(ex) - # uops = {opname for opname, _, _ in ex} - # self.assertNotIn("_BINARY_OP_ADD_INT", uops) - # - # def test_truncated_zipfile(self): - # import io - # import zipfile - # from random import random - # opt = _testinternalcapi.get_uop_optimizer() - # with temporary_optimizer(opt): - # FIXEDTEST_SIZE = 1000 - # line_gen = [bytes("Zipfile test line %d. random float: %f\n" % - # (i, random()), "ascii") - # for i in range(FIXEDTEST_SIZE)] - # - # data = b''.join(line_gen) - # compression = zipfile.ZIP_DEFLATED - # fp = io.BytesIO() - # with zipfile.ZipFile(fp, mode='w') as zipf: - # zipf.writestr('strfile', data, compress_type=compression) - # end_offset = fp.tell() - # zipfiledata = fp.getvalue() - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # zipopen.read() - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # while zipopen.read(100): - # pass - # - # fp = io.BytesIO(zipfiledata) - # with zipfile.ZipFile(fp) as zipf: - # with zipf.open('strfile') as zipopen: - # fp.truncate(end_offset - 20) - # with self.assertRaises(EOFError): - # while zipopen.read1(100): - # pass + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_int_impure_region_attr(self): + class A: + foo = 1 + def testfunc(loops): + num = 0 + while num < loops: + x = A.foo + A.foo + y = 1 + A.foo + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_call_constant_propagate_past_impure(self): + def testfunc(n): + for i in range(n): + x = 1 + y = 1 + x // y + z = x + y + return z + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_int_large_pure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + num - num + num - num + num + num + num - num + num - num + y = 1 + a = x + num + num + num + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 11) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + + def test_frame_instance_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + a = A() + def testfunc(n): + for i in range(n): + a.foo() + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + + def test_frame_class_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + def testfunc(n): + a = A() + for i in range(n): + A.foo(a) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_CLASS", uops) + + def test_call_constant_propagate_in_frame(self): + def testfunc(n): + def dummy(): + x = 1 + y = 1 + return x+y + for i in range(n): + x = dummy() + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_call_constant_propagate_through_frame(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + x = dummy(3) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_int_type_propagate_through_range(self): + def testfunc(n): + + for i in range(n): + x = i + i + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 19 * 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_GUARD_BOTH_INT", uops) + + def test_int_value_nubmering(self): + def testfunc(n): + + y = 1 + for i in range(n): + x = y + z = x + a = z + b = a + res = x + z + a + b + return res + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_GUARD_BOTH_INT", uops) + guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertEqual(len(guard_count), 1) + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_truncated_zipfile(self): + import io + import zipfile + from random import random + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + FIXEDTEST_SIZE = 1000 + line_gen = [bytes("Zipfile test line %d. random float: %f\n" % + (i, random()), "ascii") + for i in range(FIXEDTEST_SIZE)] + + data = b''.join(line_gen) + compression = zipfile.ZIP_DEFLATED + fp = io.BytesIO() + with zipfile.ZipFile(fp, mode='w') as zipf: + zipf.writestr('strfile', data, compress_type=compression) + end_offset = fp.tell() + zipfiledata = fp.getvalue() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + zipopen.read() + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read(100): + pass + + fp = io.BytesIO(zipfiledata) + with zipfile.ZipFile(fp) as zipf: + with zipf.open('strfile') as zipopen: + fp.truncate(end_offset - 20) + with self.assertRaises(EOFError): + while zipopen.read1(100): + pass diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 43bac9cb4f0733..8943cc5fec68f4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1945,6 +1945,10 @@ break; } + case _JUMP_ABSOLUTE_HEADER: { + break; + } + case _LOAD_CONST_INLINE: { _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c8670d91fd699d..2de308720fb163 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4070,6 +4070,9 @@ dummy_func( CHECK_EVAL_BREAKER(); } + op(_JUMP_ABSOLUTE_HEADER, (--)) { + } + op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 674c6339904e7d..ef4542b48bee98 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3391,6 +3391,10 @@ break; } + case _JUMP_ABSOLUTE_HEADER: { + break; + } + case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; diff --git a/Python/optimizer.c b/Python/optimizer.c index 9f5b22bbdc2dbf..78780af909421e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -735,7 +735,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } count++; int opcode = buffer[i].opcode; - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _JUMP_ABSOLUTE) { continue; } /* All other micro-ops fall through, so i+1 is reachable */ @@ -789,6 +789,15 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); + // Rewrite backward jumps + if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { + for (int end = length - 1; end > 0; end--) { + if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { + executor->trace[length-1].oparg = end; + break; + } + } + } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 78c8f39f79de4a..a4cea781d3ca94 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1269,18 +1269,22 @@ uop_abstract_interpret( // If we end in a loop, and we have a lot of space left, unroll the loop for added type stability // https://en.wikipedia.org/wiki/Loop_unrolling - if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && + if (curr->opcode == _JUMP_TO_TOP && ((ctx->emitter.curr_i * 2) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { did_loop_peel = true; loop_peel_target = ctx->emitter.curr_i; + _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; + if (emit_i(&ctx->emitter, jump_header) < 0) { + goto error; + } DPRINTF(2, "loop_peeling!\n"); goto loop_peeling; } else { if (did_loop_peel) { assert(curr->opcode == _JUMP_TO_TOP); - _PyUOpInstruction jump_rel = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; - if (emit_i(&ctx->emitter, jump_rel) < 0) { + _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; + if (emit_i(&ctx->emitter, jump_abs) < 0) { goto error; } } else { From 9692146f0dd5fde4c2dddb7e13c58afe13bf4276 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:31:10 +0800 Subject: [PATCH 050/111] peel only a single loops --- Lib/test/test_capi/test_opt.py | 24 ++++++++++++++++++++++++ Python/optimizer_analysis.c | 2 +- Python/pylifecycle.c | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index ec5858a7aff605..383f0e99334a1a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -845,6 +845,30 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_loop_peeling(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] + self.assertGreaterEqual(len(iter_next_count), 2) + def test_truncated_zipfile(self): import io import zipfile diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a4cea781d3ca94..23e793f2b19342 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1269,7 +1269,7 @@ uop_abstract_interpret( // If we end in a loop, and we have a lot of space left, unroll the loop for added type stability // https://en.wikipedia.org/wiki/Loop_unrolling - if (curr->opcode == _JUMP_TO_TOP && + if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && ((ctx->emitter.curr_i * 2) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { did_loop_peel = true; loop_peel_target = ctx->emitter.curr_i; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index f314710c773896..c7666c7b142d95 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1231,7 +1231,7 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } - // enabled = 1; // TEMPORARY: always enable + enabled = 1; // TEMPORARY: always enable if (enabled) { PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { From fe648e21e156d233a5f584ebb37d97915013684e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:34:34 +0800 Subject: [PATCH 051/111] reorder to fix macos --- Python/optimizer_analysis.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 23e793f2b19342..32ee6591281866 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1228,11 +1228,13 @@ uop_abstract_interpret( if (ctx == NULL) { goto error; } + _PyUOpInstruction *curr = NULL; + _PyUOpInstruction *end = NULL; + AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; loop_peeling: - _PyUOpInstruction *curr = trace; - _PyUOpInstruction *end = trace + trace_len; - AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; + curr = trace; + end = trace + trace_len; bool first_impure = true; while (curr < end && !op_is_end(curr->opcode)) { From 2dc0d0c99f355b2c635b2c77e2c058ba6bda9d23 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:38:06 +0800 Subject: [PATCH 052/111] please fix mac --- Python/optimizer_analysis.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 32ee6591281866..b4ce3ca70f822e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1231,12 +1231,14 @@ uop_abstract_interpret( _PyUOpInstruction *curr = NULL; _PyUOpInstruction *end = NULL; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; + bool first_impure = true; + int res = 0; loop_peeling: curr = trace; end = trace + trace_len; - - bool first_impure = true; + first_impure = true; + ; while (curr < end && !op_is_end(curr->opcode)) { if (!op_is_pure(curr->opcode) && @@ -1297,7 +1299,7 @@ uop_abstract_interpret( } - int res = ctx->emitter.curr_i; + res = ctx->emitter.curr_i; Py_DECREF(ctx); return res; From b132a87c298e714af58fad853ea6903d93704da5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:46:58 +0800 Subject: [PATCH 053/111] fix test --- Lib/test/test_capi/test_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 383f0e99334a1a..d21ec13b2f4c60 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -344,7 +344,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_JUMP_TO_TOP", uops) + self.assertIn("_JUMP_ABSOLUTE", uops) def test_jump_forward(self): def testfunc(n): From 8a726e01109fd5cb3ffa1d82db6b6f1630064b9e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 02:16:53 +0800 Subject: [PATCH 054/111] slightly reduce dispatch overhead --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 78780af909421e..60c3bda4207aa2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -793,7 +793,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { for (int end = length - 1; end > 0; end--) { if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { - executor->trace[length-1].oparg = end; + executor->trace[length-1].oparg = end + 1; break; } } From 0d9df64f8d9f4eb871962985bb26a0ceb5b46bb5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 02:21:26 +0800 Subject: [PATCH 055/111] fixx off-by-one error --- Python/optimizer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 60c3bda4207aa2..09fa85bc0fecff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -791,12 +791,15 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) assert(dest == -1); // Rewrite backward jumps if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { - for (int end = length - 1; end > 0; end--) { + bool found = false; + for (int end = length - 1; end >= 0; end--) { if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { executor->trace[length-1].oparg = end + 1; + found = true; break; } } + assert(found); } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG From a72d6effab9bf2a83ef45d7fb7c328ab2291b8d3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 08:58:27 +0800 Subject: [PATCH 056/111] peel less aggressively, clear peepholer --- Python/optimizer_analysis.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b4ce3ca70f822e..16b167cf541706 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1218,7 +1218,6 @@ uop_abstract_interpret( } #endif bool did_loop_peel = false; - int loop_peel_target = 0; _Py_UOpsAbstractInterpContext *ctx = NULL; @@ -1271,12 +1270,11 @@ uop_abstract_interpret( assert(op_is_end(curr->opcode)); - // If we end in a loop, and we have a lot of space left, unroll the loop for added type stability - // https://en.wikipedia.org/wiki/Loop_unrolling + // If we end in a loop, and we have a lot of space left, peel the loop for added type stability + // https://en.wikipedia.org/wiki/Loop_splitting if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && - ((ctx->emitter.curr_i * 2) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { + ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { did_loop_peel = true; - loop_peel_target = ctx->emitter.curr_i; _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; if (emit_i(&ctx->emitter, jump_header) < 0) { goto error; @@ -1382,7 +1380,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) int oparg = curr->oparg; switch(curr->opcode) { case _SHRINK_STACK: { - // If all that precedes a _SHRINK_STACK is a bunch of LOAD_FAST, + // If all that precedes a _SHRINK_STACK is a bunch of loads, // then we can safely eliminate that without side effects. int load_count = 0; _PyUOpInstruction *back = curr-1; @@ -1398,6 +1396,10 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) load_count = 0; while(load_count < oparg) { load_count += op_is_load(back->opcode); + if (back->opcode == _LOAD_CONST_INLINE) { + PyObject *const_val = (PyObject *)back->operand; + Py_CLEAR(const_val); + } back->opcode = NOP; back--; } From 959911311a22f3f0b63760b73e6620361ed8a460 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:26:25 +0800 Subject: [PATCH 057/111] undo loop peeling --- Include/internal/pycore_uop_ids.h | 14 +++---- Include/internal/pycore_uop_metadata.h | 4 -- Lib/test/test_capi/test_opt.py | 34 +++-------------- Python/abstract_interp_cases.c.h | 8 ---- Python/bytecodes.c | 8 ---- Python/executor_cases.c.h | 11 ------ Python/optimizer.c | 17 +-------- Python/optimizer_analysis.c | 51 ++++---------------------- 8 files changed, 21 insertions(+), 126 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 385a85881c2c48..ff09dfe5fa4d32 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -229,14 +229,12 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 376 #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 -#define _JUMP_ABSOLUTE 379 -#define _JUMP_ABSOLUTE_HEADER 380 -#define _CHECK_VALIDITY 381 -#define _LOAD_CONST_INLINE 382 -#define _LOAD_CONST_INLINE_BORROW 383 -#define _INTERNAL_INCREMENT_OPT_COUNTER 384 -#define _SHRINK_STACK 385 -#define MAX_UOP_ID 385 +#define _CHECK_VALIDITY 379 +#define _LOAD_CONST_INLINE 380 +#define _LOAD_CONST_INLINE_BORROW 381 +#define _INTERNAL_INCREMENT_OPT_COUNTER 382 +#define _SHRINK_STACK 383 +#define MAX_UOP_ID 383 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index bd2a3ec925a68b..740d2ed4d19a81 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -201,8 +201,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, - [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, - [_JUMP_ABSOLUTE_HEADER] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, @@ -316,8 +314,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", - [_JUMP_ABSOLUTE] = "_JUMP_ABSOLUTE", - [_JUMP_ABSOLUTE_HEADER] = "_JUMP_ABSOLUTE_HEADER", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_LIST_APPEND] = "_LIST_APPEND", [_LIST_EXTEND] = "_LIST_EXTEND", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index d21ec13b2f4c60..49bcc4da53f03c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -344,7 +344,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_JUMP_ABSOLUTE", uops) + self.assertIn("_JUMP_TO_TOP", uops) def test_jump_forward(self): def testfunc(n): @@ -607,7 +607,7 @@ def testfunc(loops): self.assertEqual(res, 63) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] - self.assertGreaterEqual(len(binop_count), 3) + self.assertEqual(len(binop_count), 3) self.assertEqual(len(guard_both_int_count), 1) def test_int_impure_region(self): @@ -629,7 +629,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertGreaterEqual(len(binop_count), 3) + self.assertEqual(len(binop_count), 3) def test_int_impure_region_attr(self): class A: @@ -652,7 +652,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertGreaterEqual(len(binop_count), 3) + self.assertEqual(len(binop_count), 3) def test_call_constant_propagate_past_impure(self): def testfunc(n): @@ -691,7 +691,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertGreaterEqual(len(binop_count), 11) + self.assertEqual(len(binop_count), 11) def test_call_py_exact_args(self): def testfunc(n): @@ -845,30 +845,6 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertNotIn("_BINARY_OP_ADD_INT", uops) - def test_loop_peeling(self): - def testfunc(loops): - num = 0 - for _ in range(loops): - x = 0 - y = 1 - a = x + y - return 1 - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(64) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 1) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 0) - uops = {opname for opname, _, _ in ex} - self.assertNotIn("_SHRINK_STACK", uops) - iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] - self.assertGreaterEqual(len(iter_next_count), 2) - def test_truncated_zipfile(self): import io import zipfile diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 8943cc5fec68f4..b567559fe0953f 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1941,14 +1941,6 @@ break; } - case _JUMP_ABSOLUTE: { - break; - } - - case _JUMP_ABSOLUTE_HEADER: { - break; - } - case _LOAD_CONST_INLINE: { _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2de308720fb163..043d83bb06d3d0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4065,14 +4065,6 @@ dummy_func( DEOPT_IF(1); } - op(_JUMP_ABSOLUTE, (--)) { - next_uop = current_executor->trace + oparg; - CHECK_EVAL_BREAKER(); - } - - op(_JUMP_ABSOLUTE_HEADER, (--)) { - } - op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ef4542b48bee98..10568aa2615346 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3384,17 +3384,6 @@ break; } - case _JUMP_ABSOLUTE: { - oparg = CURRENT_OPARG(); - next_uop = current_executor->trace + oparg; - CHECK_EVAL_BREAKER(); - break; - } - - case _JUMP_ABSOLUTE_HEADER: { - break; - } - case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; diff --git a/Python/optimizer.c b/Python/optimizer.c index 09fa85bc0fecff..044a47dda4680c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -735,7 +735,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } count++; int opcode = buffer[i].opcode; - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _JUMP_ABSOLUTE) { + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { continue; } /* All other micro-ops fall through, so i+1 is reachable */ @@ -776,8 +776,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) executor->trace[dest] = buffer[i]; int opcode = buffer[i].opcode; if (opcode == _POP_JUMP_IF_FALSE || - opcode == _POP_JUMP_IF_TRUE || - opcode == _JUMP_ABSOLUTE) + opcode == _POP_JUMP_IF_TRUE) { /* The oparg of the target will already have been set to its new offset */ int oparg = executor->trace[dest].oparg; @@ -789,18 +788,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); - // Rewrite backward jumps - if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { - bool found = false; - for (int end = length - 1; end >= 0; end--) { - if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { - executor->trace[length-1].oparg = end + 1; - found = true; - break; - } - } - assert(found); - } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 16b167cf541706..6e9bd142f9b3bc 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1217,7 +1217,6 @@ uop_abstract_interpret( lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } #endif - bool did_loop_peel = false; _Py_UOpsAbstractInterpContext *ctx = NULL; @@ -1227,17 +1226,12 @@ uop_abstract_interpret( if (ctx == NULL) { goto error; } - _PyUOpInstruction *curr = NULL; - _PyUOpInstruction *end = NULL; + + _PyUOpInstruction *curr = trace; + _PyUOpInstruction *end = trace + trace_len; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; - bool first_impure = true; - int res = 0; -loop_peeling: - curr = trace; - end = trace + trace_len; - first_impure = true; - ; + bool first_impure = true; while (curr < end && !op_is_end(curr->opcode)) { if (!op_is_pure(curr->opcode) && @@ -1269,38 +1263,13 @@ uop_abstract_interpret( } assert(op_is_end(curr->opcode)); - - // If we end in a loop, and we have a lot of space left, peel the loop for added type stability - // https://en.wikipedia.org/wiki/Loop_splitting - if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && - ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { - did_loop_peel = true; - _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; - if (emit_i(&ctx->emitter, jump_header) < 0) { - goto error; - } - DPRINTF(2, "loop_peeling!\n"); - goto loop_peeling; - } - else { - if (did_loop_peel) { - assert(curr->opcode == _JUMP_TO_TOP); - _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; - if (emit_i(&ctx->emitter, jump_abs) < 0) { - goto error; - } - } else { - if (emit_i(&ctx->emitter, *curr) < 0) { - goto error; - } - } + if (emit_i(&ctx->emitter, *curr) < 0) { + goto error; } - - res = ctx->emitter.curr_i; Py_DECREF(ctx); - return res; + return (int)(curr - trace); error: Py_XDECREF(ctx); @@ -1380,7 +1349,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) int oparg = curr->oparg; switch(curr->opcode) { case _SHRINK_STACK: { - // If all that precedes a _SHRINK_STACK is a bunch of loads, + // If all that precedes a _SHRINK_STACK is a bunch of LOAD_FAST, // then we can safely eliminate that without side effects. int load_count = 0; _PyUOpInstruction *back = curr-1; @@ -1396,10 +1365,6 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) load_count = 0; while(load_count < oparg) { load_count += op_is_load(back->opcode); - if (back->opcode == _LOAD_CONST_INLINE) { - PyObject *const_val = (PyObject *)back->operand; - Py_CLEAR(const_val); - } back->opcode = NOP; back--; } From 7de4b375fbaf6780e5c8bb41e0d6c039aeb21fd0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 26 Jan 2024 14:04:01 +0800 Subject: [PATCH 058/111] Revert "undo loop peeling" This reverts commit 959911311a22f3f0b63760b73e6620361ed8a460. --- Include/internal/pycore_uop_ids.h | 14 ++++--- Include/internal/pycore_uop_metadata.h | 4 ++ Lib/test/test_capi/test_opt.py | 34 ++++++++++++++--- Python/abstract_interp_cases.c.h | 8 ++++ Python/bytecodes.c | 8 ++++ Python/executor_cases.c.h | 11 ++++++ Python/optimizer.c | 17 ++++++++- Python/optimizer_analysis.c | 51 ++++++++++++++++++++++---- 8 files changed, 126 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ff09dfe5fa4d32..385a85881c2c48 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -229,12 +229,14 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 376 #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 -#define _CHECK_VALIDITY 379 -#define _LOAD_CONST_INLINE 380 -#define _LOAD_CONST_INLINE_BORROW 381 -#define _INTERNAL_INCREMENT_OPT_COUNTER 382 -#define _SHRINK_STACK 383 -#define MAX_UOP_ID 383 +#define _JUMP_ABSOLUTE 379 +#define _JUMP_ABSOLUTE_HEADER 380 +#define _CHECK_VALIDITY 381 +#define _LOAD_CONST_INLINE 382 +#define _LOAD_CONST_INLINE_BORROW 383 +#define _INTERNAL_INCREMENT_OPT_COUNTER 384 +#define _SHRINK_STACK 385 +#define MAX_UOP_ID 385 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 740d2ed4d19a81..bd2a3ec925a68b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -201,6 +201,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, + [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, + [_JUMP_ABSOLUTE_HEADER] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, @@ -314,6 +316,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", + [_JUMP_ABSOLUTE] = "_JUMP_ABSOLUTE", + [_JUMP_ABSOLUTE_HEADER] = "_JUMP_ABSOLUTE_HEADER", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_LIST_APPEND] = "_LIST_APPEND", [_LIST_EXTEND] = "_LIST_EXTEND", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 49bcc4da53f03c..d21ec13b2f4c60 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -344,7 +344,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_JUMP_TO_TOP", uops) + self.assertIn("_JUMP_ABSOLUTE", uops) def test_jump_forward(self): def testfunc(n): @@ -607,7 +607,7 @@ def testfunc(loops): self.assertEqual(res, 63) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] - self.assertEqual(len(binop_count), 3) + self.assertGreaterEqual(len(binop_count), 3) self.assertEqual(len(guard_both_int_count), 1) def test_int_impure_region(self): @@ -629,7 +629,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) + self.assertGreaterEqual(len(binop_count), 3) def test_int_impure_region_attr(self): class A: @@ -652,7 +652,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 3) + self.assertGreaterEqual(len(binop_count), 3) def test_call_constant_propagate_past_impure(self): def testfunc(n): @@ -691,7 +691,7 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 11) + self.assertGreaterEqual(len(binop_count), 11) def test_call_py_exact_args(self): def testfunc(n): @@ -845,6 +845,30 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertNotIn("_BINARY_OP_ADD_INT", uops) + def test_loop_peeling(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] + self.assertGreaterEqual(len(iter_next_count), 2) + def test_truncated_zipfile(self): import io import zipfile diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index b567559fe0953f..8943cc5fec68f4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1941,6 +1941,14 @@ break; } + case _JUMP_ABSOLUTE: { + break; + } + + case _JUMP_ABSOLUTE_HEADER: { + break; + } + case _LOAD_CONST_INLINE: { _Py_UOpsSymbolicValue *__value_; __value_ = sym_init_unknown(ctx); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 043d83bb06d3d0..2de308720fb163 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4065,6 +4065,14 @@ dummy_func( DEOPT_IF(1); } + op(_JUMP_ABSOLUTE, (--)) { + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + } + + op(_JUMP_ABSOLUTE_HEADER, (--)) { + } + op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 10568aa2615346..ef4542b48bee98 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3384,6 +3384,17 @@ break; } + case _JUMP_ABSOLUTE: { + oparg = CURRENT_OPARG(); + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + break; + } + + case _JUMP_ABSOLUTE_HEADER: { + break; + } + case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; diff --git a/Python/optimizer.c b/Python/optimizer.c index 044a47dda4680c..09fa85bc0fecff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -735,7 +735,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } count++; int opcode = buffer[i].opcode; - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _JUMP_ABSOLUTE) { continue; } /* All other micro-ops fall through, so i+1 is reachable */ @@ -776,7 +776,8 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) executor->trace[dest] = buffer[i]; int opcode = buffer[i].opcode; if (opcode == _POP_JUMP_IF_FALSE || - opcode == _POP_JUMP_IF_TRUE) + opcode == _POP_JUMP_IF_TRUE || + opcode == _JUMP_ABSOLUTE) { /* The oparg of the target will already have been set to its new offset */ int oparg = executor->trace[dest].oparg; @@ -788,6 +789,18 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); + // Rewrite backward jumps + if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { + bool found = false; + for (int end = length - 1; end >= 0; end--) { + if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { + executor->trace[length-1].oparg = end + 1; + found = true; + break; + } + } + assert(found); + } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6e9bd142f9b3bc..16b167cf541706 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1217,6 +1217,7 @@ uop_abstract_interpret( lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } #endif + bool did_loop_peel = false; _Py_UOpsAbstractInterpContext *ctx = NULL; @@ -1226,12 +1227,17 @@ uop_abstract_interpret( if (ctx == NULL) { goto error; } - - _PyUOpInstruction *curr = trace; - _PyUOpInstruction *end = trace + trace_len; + _PyUOpInstruction *curr = NULL; + _PyUOpInstruction *end = NULL; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; - bool first_impure = true; + int res = 0; + +loop_peeling: + curr = trace; + end = trace + trace_len; + first_impure = true; + ; while (curr < end && !op_is_end(curr->opcode)) { if (!op_is_pure(curr->opcode) && @@ -1263,13 +1269,38 @@ uop_abstract_interpret( } assert(op_is_end(curr->opcode)); - if (emit_i(&ctx->emitter, *curr) < 0) { - goto error; + + // If we end in a loop, and we have a lot of space left, peel the loop for added type stability + // https://en.wikipedia.org/wiki/Loop_splitting + if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && + ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { + did_loop_peel = true; + _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; + if (emit_i(&ctx->emitter, jump_header) < 0) { + goto error; + } + DPRINTF(2, "loop_peeling!\n"); + goto loop_peeling; + } + else { + if (did_loop_peel) { + assert(curr->opcode == _JUMP_TO_TOP); + _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; + if (emit_i(&ctx->emitter, jump_abs) < 0) { + goto error; + } + } else { + if (emit_i(&ctx->emitter, *curr) < 0) { + goto error; + } + } } + + res = ctx->emitter.curr_i; Py_DECREF(ctx); - return (int)(curr - trace); + return res; error: Py_XDECREF(ctx); @@ -1349,7 +1380,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) int oparg = curr->oparg; switch(curr->opcode) { case _SHRINK_STACK: { - // If all that precedes a _SHRINK_STACK is a bunch of LOAD_FAST, + // If all that precedes a _SHRINK_STACK is a bunch of loads, // then we can safely eliminate that without side effects. int load_count = 0; _PyUOpInstruction *back = curr-1; @@ -1365,6 +1396,10 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) load_count = 0; while(load_count < oparg) { load_count += op_is_load(back->opcode); + if (back->opcode == _LOAD_CONST_INLINE) { + PyObject *const_val = (PyObject *)back->operand; + Py_CLEAR(const_val); + } back->opcode = NOP; back--; } From acc6490861d8c5977cb8504345acd0fbf7c2148c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:19:06 +0800 Subject: [PATCH 059/111] add more memory for everything --- Lib/test/test_capi/test_opt.py | 1 + Python/optimizer.c | 2 +- Python/optimizer_analysis.c | 9 +++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index d21ec13b2f4c60..d974d1fab219d5 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -709,6 +709,7 @@ def dummy(x): uops = {opname for opname, _, _ in ex} self.assertIn("_PUSH_FRAME", uops) self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_CHECK_PEP_523", uops) def test_frame_instance_method(self): class A: diff --git a/Python/optimizer.c b/Python/optimizer.c index 09fa85bc0fecff..83b1c773a9f2a8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -19,7 +19,7 @@ // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 512 // This the above + additional working space we need. -#define UOP_MAX_TRACE_WORKING_LENGTH UOP_MAX_TRACE_LENGTH * 2 +#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 4) #define MAX_EXECUTORS_SIZE 256 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 16b167cf541706..cc89b7863a931f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -30,9 +30,9 @@ #include #include -#define MAX_ABSTRACT_INTERP_SIZE 2048 +#define MAX_ABSTRACT_INTERP_SIZE 4096 -#define OVERALLOCATE_FACTOR 3 +#define OVERALLOCATE_FACTOR 8 #define PEEPHOLE_MAX_ATTEMPTS 5 @@ -1446,10 +1446,11 @@ _Py_uop_analyze_and_optimize( goto error; } + bool done = false; for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && - !peephole_optimizations(temp_writebuffer, new_trace_len); + !done; peephole_attempts++) { - + done = peephole_optimizations(temp_writebuffer, new_trace_len); } remove_unneeded_uops(temp_writebuffer, new_trace_len); From cf59bbadf533531a4f13426a888668b052395aed Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:43:24 +0800 Subject: [PATCH 060/111] peephole on failure --- Python/optimizer_analysis.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index cc89b7863a931f..7af3ca7f2ad537 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1431,6 +1431,7 @@ _Py_uop_analyze_and_optimize( { _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; + bool done = false; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size); if (temp_writebuffer == NULL) { @@ -1446,7 +1447,6 @@ _Py_uop_analyze_and_optimize( goto error; } - bool done = false; for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && !done; peephole_attempts++) { @@ -1476,6 +1476,11 @@ _Py_uop_analyze_and_optimize( // to fetch a function version because the function got deleted. err_occurred = PyErr_Occurred(); PyMem_Free(temp_writebuffer); + for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && + !done; + peephole_attempts++) { + done = peephole_optimizations(buffer, buffer_size); + } remove_unneeded_uops(buffer, buffer_size); return err_occurred ? -1 : 0; } \ No newline at end of file From 41882cec2671ebb86177b68f93654d35eb18543d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 27 Jan 2024 11:50:31 +0800 Subject: [PATCH 061/111] add stats collection --- Include/cpython/pystats.h | 4 ++++ Python/optimizer_analysis.c | 6 ++++++ Python/specialize.c | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ba67eefef3e37a..16c5804a62e72e 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -120,6 +120,10 @@ typedef struct _optimization_stats { uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimizer_attempts; + uint64_t optimizer_successes; + uint64_t optimizer_failure_reason_null_function; + uint64_t optimizer_failure_reason_no_memory; } OptimizationStats; typedef struct _stats { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7af3ca7f2ad537..c77249c9e44995 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -248,6 +248,7 @@ ir_frame_push_info(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *push_f DPRINTF(3, "ir_frame_push_info\n"); #endif if (ctx->frame_info.curr_number >= ctx->frame_info.max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); DPRINTF(1, "ir_frame_push_info: ran out of space \n"); return NULL; } @@ -531,6 +532,7 @@ char *uop_debug = Py_GETENV(DEBUG_ENV); uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); if (func == NULL) { + OPT_STAT_INC(optimizer_failure_reason_null_function); DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); return NULL; } @@ -607,12 +609,14 @@ _Py_UOpsSymbolicValue_New(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsSymbolicValue *self = (_Py_UOpsSymbolicValue *)ctx->s_arena.curr_available; ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicValue) + sizeof(_Py_UOpsSymbolicValue *); if (ctx->s_arena.curr_available >= ctx->s_arena.end) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); DPRINTF(1, "out of space for symbolic expression\n"); return NULL; } _Py_UOpsSymType *ty = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); DPRINTF(1, "out of space for symbolic expression type\n"); return NULL; } @@ -1429,6 +1433,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { + OPT_STAT_INC(optimizer_attempts); _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; bool done = false; @@ -1469,6 +1474,7 @@ _Py_uop_analyze_and_optimize( after++; } + OPT_STAT_INC(optimizer_successes); return 0; error: // The only valid error we can raise is MemoryError. diff --git a/Python/specialize.c b/Python/specialize.c index 13e0440dd9dd0d..39b0ad5e881179 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -240,6 +240,11 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) print_histogram(out, "Trace run length", stats->trace_run_length_hist); print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts); + fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); + fprintf(out, "Optimization optimizer failure null function: %" PRIu64 "\n", stats->optimizer_failure_reason_null_function); + fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", stats->optimizer_failure_reason_no_memory); + const char* const* names; for (int i = 0; i < 512; i++) { if (i < 256) { From 797bc1efb0df1474fdfc407e4f3a12264ba2d1d1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 27 Jan 2024 12:39:40 +0800 Subject: [PATCH 062/111] Revert "add more memory for everything" This reverts commit acc6490861d8c5977cb8504345acd0fbf7c2148c. partially --- Python/optimizer.c | 2 +- Python/optimizer_analysis.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 83b1c773a9f2a8..09fa85bc0fecff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -19,7 +19,7 @@ // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 512 // This the above + additional working space we need. -#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 4) +#define UOP_MAX_TRACE_WORKING_LENGTH UOP_MAX_TRACE_LENGTH * 2 #define MAX_EXECUTORS_SIZE 256 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c77249c9e44995..d58aeab224e8cd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -30,9 +30,9 @@ #include #include -#define MAX_ABSTRACT_INTERP_SIZE 4096 +#define MAX_ABSTRACT_INTERP_SIZE 2048 -#define OVERALLOCATE_FACTOR 8 +#define OVERALLOCATE_FACTOR 3 #define PEEPHOLE_MAX_ATTEMPTS 5 @@ -1453,9 +1453,9 @@ _Py_uop_analyze_and_optimize( } for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && - !done; + !peephole_optimizations(temp_writebuffer, new_trace_len); peephole_attempts++) { - done = peephole_optimizations(temp_writebuffer, new_trace_len); + } remove_unneeded_uops(temp_writebuffer, new_trace_len); From 22da280464e0e773cb175883577c41e0b0c98e57 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 27 Jan 2024 20:51:06 +0800 Subject: [PATCH 063/111] remove bad redefinition --- Python/optimizer_analysis.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d58aeab224e8cd..28df66d5fa32ec 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -951,7 +951,6 @@ uop_abstract_interpret_single_inst( #define CURRENT_OPERAND() (operand) -#define STAT_INC(opname, name) ((void)0) #define TIER_TWO_ONLY ((void)0) int oparg = inst->oparg; From 64fa51ca40b7d76f79c4d5343be869c8825e5f0b Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 11:41:38 +0800 Subject: [PATCH 064/111] Partially address Guido's review --- Include/internal/pycore_uop_metadata.h | 4 +- Python/abstract_interp_cases.c.h | 162 +++++++---- Python/optimizer_analysis.c | 270 +++--------------- .../cases_generator/tier2_abstract_common.py | 2 - .../tier2_abstract_generator.py | 30 +- 5 files changed, 159 insertions(+), 309 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index bd2a3ec925a68b..8dc4d10c819e2b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -26,7 +26,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_SPECIAL_OPT_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_POP_TOP] = HAS_PURE_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -192,7 +192,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG, - [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 8943cc5fec68f4..fc1ff9659d1d0e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -18,6 +18,13 @@ /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + case _POP_TOP: { + _Py_UOpsSymbolicValue *__value_; + __value_ = stack_pointer[-1]; + stack_pointer += -1; + break; + } + case _END_SEND: { _Py_UOpsSymbolicValue *__value_; _Py_UOpsSymbolicValue *__receiver_; @@ -43,7 +50,7 @@ _Py_UOpsSymbolicValue *__res_; __value_ = stack_pointer[-1]; // Constant evaluation - if (is_const(__value_)){ + if (is_const(__value_)) { PyObject *value; PyObject *res; value = get_const(__value_); @@ -53,7 +60,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 1; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -81,7 +89,8 @@ if (!PyBool_Check(value)) goto error; STAT_INC(TO_BOOL, hit); DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -149,12 +158,14 @@ if (!PyLong_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -171,7 +182,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -185,7 +196,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -205,7 +217,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -219,7 +231,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -239,7 +252,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -253,7 +266,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -281,12 +295,14 @@ if (!PyFloat_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -303,7 +319,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -318,7 +334,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -338,7 +355,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -353,7 +370,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -373,7 +391,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -388,7 +406,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -416,12 +435,14 @@ if (!PyUnicode_CheckExact(right)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -438,7 +459,7 @@ __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation - if (is_const(__left_) && is_const(__right_)){ + if (is_const(__left_) && is_const(__right_)) { PyObject *right; PyObject *left; PyObject *res; @@ -452,7 +473,8 @@ if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } - new_inst.opcode = _NOP;} + new_inst.opcode = _NOP; + } else { __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); if (__res_ == NULL) { goto error; } @@ -956,12 +978,14 @@ if (tp->tp_version_tag != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1002,7 +1026,8 @@ if (dict->ma_keys->dk_version != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1035,7 +1060,8 @@ if (dict == NULL) goto error; assert(PyDict_CheckExact((PyObject *)dict)); DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1081,7 +1107,8 @@ if (((PyTypeObject *)owner)->tp_version_tag != type_version) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1116,12 +1143,14 @@ if (!_PyDictOrValues_IsValues(dorv)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1314,7 +1343,8 @@ if (Py_TYPE(iter) != &PyListIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1335,7 +1365,8 @@ if (it->it_index >= PyList_GET_SIZE(seq)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1359,7 +1390,8 @@ if (Py_TYPE(iter) != &PyTupleIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1380,7 +1412,8 @@ if (it->it_index >= PyTuple_GET_SIZE(seq)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1405,7 +1438,8 @@ if (Py_TYPE(r) != &PyRangeIter_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1424,7 +1458,8 @@ if (r->len <= 0) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1501,12 +1536,14 @@ if (!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1528,12 +1565,14 @@ if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1600,7 +1639,8 @@ if (dict != NULL) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } break; } @@ -1637,12 +1677,14 @@ if (Py_TYPE(callable) != &PyMethod_Type) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1687,12 +1729,14 @@ if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)){ + if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP;break; + new_inst.opcode = _NOP; + break; } else { // Type propagation @@ -1899,6 +1943,16 @@ break; } + case _SWAP: { + _Py_UOpsSymbolicValue *__top_; + _Py_UOpsSymbolicValue *__bottom_; + __top_ = stack_pointer[-1]; + __bottom_ = stack_pointer[-2 - (oparg-2)]; + stack_pointer[-2 - (oparg-2)] = __top_; + stack_pointer[-1] = __bottom_; + break; + } + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 28df66d5fa32ec..42bfdc46c021f2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -38,8 +38,16 @@ #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static inline int get_lltrace() { + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; + } #define DPRINTF(level, ...) \ - if (lltrace >= (level)) { printf(__VA_ARGS__); } + if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } #else #define DPRINTF(level, ...) #endif @@ -71,6 +79,8 @@ typedef enum { INVALID_TYPE = 31, } _Py_UOpsSymExprTypeEnum; +#define MAX_TYPE_WITH_REFINEMENT PYFUNCTION_TYPE_VERSION_TYPE + static const uint32_t IMMUTABLES = ( 1 << NULL_TYPE | @@ -81,7 +91,6 @@ static const uint32_t IMMUTABLES = 1 << TRUE_CONST ); -#define MAX_TYPE_WITH_REFINEMENT 2 typedef struct { // bitmask of types uint32_t types; @@ -100,13 +109,6 @@ typedef struct _Py_UOpsSymbolicValue { // more optimizations. } _Py_UOpsSymbolicValue; -typedef struct frame_info { - // Only used in codegen for bookkeeping. - struct frame_info *prev_frame_ir; - // Localsplus of this frame. - _Py_UOpsSymbolicValue **my_virtual_localsplus; -} frame_info; - typedef struct _Py_UOpsAbstractFrame { PyObject_HEAD // Strong reference. @@ -120,8 +122,6 @@ typedef struct _Py_UOpsAbstractFrame { int stack_len; int locals_len; - frame_info *frame_ir_entry; - _Py_UOpsSymbolicValue **stack_pointer; _Py_UOpsSymbolicValue **stack; _Py_UOpsSymbolicValue **locals; @@ -145,19 +145,6 @@ PyTypeObject _Py_UOpsAbstractFrame_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -typedef struct creating_new_frame { - _Py_UOpsSymbolicValue *func; - _Py_UOpsSymbolicValue *self_or_null; - _Py_UOpsSymbolicValue **args; -} creating_new_frame; - - -typedef struct frame_info_arena { - int curr_number; - int max_number; - frame_info *arena; -} frame_info_arena; - typedef struct sym_arena { char *curr_available; char *end; @@ -183,15 +170,9 @@ typedef struct uops_emitter { // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD - // Stores the information for the upcoming new frame that is about to be created. - // Corresponds to _INIT_CALL_PY_EXACT_ARGS. - creating_new_frame new_frame_sym; // The current "executing" frame. _Py_UOpsAbstractFrame *frame; - // An arena for the frame information. - frame_info_arena frame_info; - // Arena for the symbolic expression themselves. sym_arena s_arena; // Arena for the symbolic expressions' types. @@ -221,7 +202,6 @@ abstractinterp_dealloc(PyObject *o) } PyMem_Free(self->t_arena.arena); PyMem_Free(self->s_arena.arena); - PyMem_Free(self->frame_info.arena); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -235,42 +215,10 @@ PyTypeObject _Py_UOpsAbstractInterpContext_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -// Tags a _PUSH_FRAME with the frame info. -static frame_info * -ir_frame_push_info(_Py_UOpsAbstractInterpContext *ctx, _PyUOpInstruction *push_frame) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "ir_frame_push_info\n"); -#endif - if (ctx->frame_info.curr_number >= ctx->frame_info.max_number) { - OPT_STAT_INC(optimizer_failure_reason_no_memory); - DPRINTF(1, "ir_frame_push_info: ran out of space \n"); - return NULL; - } - frame_info *entry = &ctx->frame_info.arena[ctx->frame_info.curr_number]; - entry->my_virtual_localsplus = NULL; - entry->prev_frame_ir = NULL; - // root frame - if (push_frame == NULL) { - assert(ctx->frame_info.curr_number == 0); - ctx->frame_info.curr_number++; - return entry; - } - assert(push_frame->opcode == _PUSH_FRAME); - push_frame->operand = (uintptr_t)entry; - ctx->frame_info.curr_number++; - return entry; -} - static inline _Py_UOpsAbstractFrame * frame_new(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen, frame_info *frame_ir_entry); + int curr_stacklen); static inline int frame_push(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, @@ -295,15 +243,9 @@ abstractinterp_context_new(PyCodeObject *co, _Py_UOpsAbstractInterpContext *self = NULL; char *arena = NULL; _Py_UOpsSymType *t_arena = NULL; - frame_info *frame_info_arena = NULL; Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicValue)) * ir_entries * OVERALLOCATE_FACTOR; Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; - Py_ssize_t frame_info_arena_size = (sizeof(frame_info)) * ir_entries * OVERALLOCATE_FACTOR; - frame_info_arena = PyMem_Malloc(frame_info_arena_size); - if (frame_info_arena == NULL) { - goto error; - } arena = (char *)PyMem_Malloc(arena_size); if (arena == NULL) { @@ -323,17 +265,6 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - // Setup frame info arena - self->frame_info.curr_number = 0; - self->frame_info.arena = frame_info_arena; - self->frame_info.max_number = ir_entries * OVERALLOCATE_FACTOR; - - - frame_info *root_frame = ir_frame_push_info(self, NULL); - if (root_frame == NULL) { - goto error; - } - self->limit = self->localsplus + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->localsplus; for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { @@ -351,11 +282,8 @@ abstractinterp_context_new(PyCodeObject *co, self->t_arena.ty_max_number = ir_entries * OVERALLOCATE_FACTOR; // Frame setup - self->new_frame_sym.func = NULL; - self->new_frame_sym.args = NULL; - self->new_frame_sym.self_or_null = NULL; - frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen, root_frame); + frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen); if (frame == NULL) { goto error; } @@ -368,7 +296,6 @@ abstractinterp_context_new(PyCodeObject *co, } self->frame = frame; assert(frame != NULL); - root_frame->my_virtual_localsplus = self->localsplus; // IR and sym setup self->frequent_syms.push_nulL_sym = NULL; @@ -383,12 +310,10 @@ abstractinterp_context_new(PyCodeObject *co, error: PyMem_Free(arena); PyMem_Free(t_arena); - PyMem_Free(frame_info_arena); if (self != NULL) { // Important so we don't double free them. self->t_arena.arena = NULL; self->s_arena.arena = NULL; - self->frame_info.arena = NULL; self->frame = NULL; } Py_XDECREF(self); @@ -482,7 +407,7 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame static inline _Py_UOpsAbstractFrame * frame_new(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen, frame_info *frame_ir_entry) + int curr_stacklen) { _Py_UOpsSymbolicValue **sym_consts = create_sym_consts(ctx, co_consts); if (sym_consts == NULL) { @@ -503,7 +428,6 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, frame->prev = NULL; frame->next = NULL; - frame->frame_ir_entry = frame_ir_entry; return frame; } @@ -513,17 +437,8 @@ static inline uint64_t sym_type_get_refinement(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ); static inline PyFunctionObject * -extract_func_from_sym(creating_new_frame *frame_sym) +extract_func_from_sym(_Py_UOpsSymbolicValue *callable_sym) { -#ifdef Py_DEBUG -char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - DPRINTF(3, "extract_func_from_sym\n"); -#endif - _Py_UOpsSymbolicValue *callable_sym = frame_sym->func; assert(callable_sym != NULL); if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { DPRINTF(1, "error: _PUSH_FRAME not function type\n"); @@ -544,16 +459,14 @@ char *uop_debug = Py_GETENV(DEBUG_ENV); static int ctx_frame_push( _Py_UOpsAbstractInterpContext *ctx, - frame_info *frame_ir_entry, PyCodeObject *co, _Py_UOpsSymbolicValue **localsplus_start ) { - assert(frame_ir_entry != NULL); _Py_UOpsAbstractFrame *frame = frame_new(ctx, co->co_consts, co->co_stacksize, co->co_nlocalsplus, - 0, frame_ir_entry); + 0); if (frame == NULL) { return -1; } @@ -569,7 +482,6 @@ ctx_frame_push( ctx->frame->next = frame; ctx->frame = frame; - frame_ir_entry->my_virtual_localsplus = localsplus_start; return 0; } @@ -598,14 +510,6 @@ static _Py_UOpsSymbolicValue* _Py_UOpsSymbolicValue_New(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) { -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - _Py_UOpsSymbolicValue *self = (_Py_UOpsSymbolicValue *)ctx->s_arena.curr_available; ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicValue) + sizeof(_Py_UOpsSymbolicValue *); if (ctx->s_arena.curr_available >= ctx->s_arena.end) { @@ -829,13 +733,6 @@ static inline int emit_i(uops_emitter *emitter, _PyUOpInstruction inst) { -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif if (emitter->curr_i < 0) { DPRINTF(2, "out of emission space\n"); return -1; @@ -914,14 +811,6 @@ uop_abstract_interpret_single_inst( _Py_UOpsAbstractInterpContext *ctx ) { -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) #define STACK_SIZE() (ctx->frame->stack_len) #define BASIC_STACKADJ(n) (stack_pointer += n) @@ -970,27 +859,19 @@ uop_abstract_interpret_single_inst( // Note: LOAD_FAST_CHECK is not pure!!! case LOAD_FAST_CHECK: { STACK_GROW(1); - _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); - _Py_UOpsSymbolicValue * new_local = sym_init_unknown(ctx); - if (new_local == NULL) { + _Py_UOpsSymbolicValue *local = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_type(local, NULL_TYPE)) { goto error; } - sym_copy_type_number(local, new_local); - PEEK(1) = new_local; + PEEK(1) = local; break; } case LOAD_FAST: { STACK_GROW(1); _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); - // Might be NULL - replace with LOAD_FAST_CHECK if (sym_is_type(local, NULL_TYPE)) { - _Py_UOpsSymbolicValue * new_local = sym_init_unknown(ctx); - if (new_local == NULL) { - goto error; - } - sym_copy_type_number(local, new_local); - PEEK(1) = new_local; - break; + Py_UNREACHABLE(); } // Guaranteed by the CPython bytecode compiler to not be uninitialized. PEEK(1) = GETLOCAL(oparg); @@ -1001,13 +882,6 @@ uop_abstract_interpret_single_inst( case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PEEK(1) = GETLOCAL(oparg); - ctx->frame->stack_pointer = stack_pointer; - _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); - if (new_local == NULL) { - goto error; - } - sym_set_type(new_local, NULL_TYPE, 0); - GETLOCAL(oparg) = new_local; break; } case LOAD_CONST: { @@ -1027,29 +901,14 @@ uop_abstract_interpret_single_inst( case STORE_FAST_MAYBE_NULL: case STORE_FAST: { _Py_UOpsSymbolicValue *value = PEEK(1); - _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); - if (new_local == NULL) { - goto error; - } - sym_copy_type_number(value, new_local); - GETLOCAL(oparg) = new_local; + GETLOCAL(oparg) = value; STACK_SHRINK(1); break; } case COPY: { _Py_UOpsSymbolicValue *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); - _Py_UOpsSymbolicValue *temp = sym_init_unknown(ctx); - if (temp == NULL) { - goto error; - } - PEEK(1) = temp; - sym_copy_type_number(bottom, temp); - break; - } - - case POP_TOP: { - STACK_SHRINK(1); + PEEK(1) = bottom; break; } @@ -1064,61 +923,37 @@ uop_abstract_interpret_single_inst( } case _INIT_CALL_PY_EXACT_ARGS: { - _Py_UOpsSymbolicValue **__args_; - _Py_UOpsSymbolicValue *__self_or_null_; - _Py_UOpsSymbolicValue *__callable_; - _Py_UOpsSymbolicValue *__new_frame_; - __args_ = &stack_pointer[-oparg]; - __self_or_null_ = stack_pointer[-1 - oparg]; - __callable_ = stack_pointer[-2 - oparg]; - // Store the frame symbolic to extract information later - assert(ctx->new_frame_sym.func == NULL); - ctx->new_frame_sym.func = __callable_; - ctx->new_frame_sym.self_or_null = __self_or_null_; - ctx->new_frame_sym.args = __args_; - __new_frame_ = _Py_UOpsSymbolicValue_New(ctx, NULL); - if (__new_frame_ == NULL) { - goto error; - } - stack_pointer[-2 - oparg] = (_Py_UOpsSymbolicValue *)__new_frame_; + // Don't put in the new frame. Leave it be so that _PUSH_FRAME + // can extract callable, self_or_null and args later. + // Set stack pointer to the callable. stack_pointer += -1 - oparg; break; } case _PUSH_FRAME: { int argcount = oparg; - // TOS is the new frame. - STACK_SHRINK(1); - ctx->frame->stack_pointer = stack_pointer; - frame_info *frame_ir_entry = ir_frame_push_info(ctx, inst); - if (frame_ir_entry == NULL) { - goto error; - } + // TOS is the new callable, above it self_or_null and args - PyFunctionObject *func = extract_func_from_sym(&ctx->new_frame_sym); + PyFunctionObject *func = extract_func_from_sym(PEEK(1)); if (func == NULL) { goto error; } PyCodeObject *co = (PyCodeObject *)func->func_code; - _Py_UOpsSymbolicValue *self_or_null = ctx->new_frame_sym.self_or_null; + _Py_UOpsSymbolicValue *self_or_null = PEEK(0); assert(self_or_null != NULL); - _Py_UOpsSymbolicValue **args = ctx->new_frame_sym.args; + _Py_UOpsSymbolicValue **args = &PEEK(-1); assert(args != NULL); - ctx->new_frame_sym.func = NULL; - ctx->new_frame_sym.self_or_null = NULL; - ctx->new_frame_sym.args = NULL; // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS - if (!sym_is_type(self_or_null, NULL_TYPE)) { + if (!sym_is_type(self_or_null, NULL_TYPE) && + !sym_is_type(self_or_null, SELF_OR_NULL)) { args--; argcount++; } - if (ctx_frame_push( - ctx, - frame_ir_entry, - co, - ctx->water_level - ) != 0){ + // This is _PUSH_FRAME's stack effect + STACK_SHRINK(1); + ctx->frame->stack_pointer = stack_pointer; + if (ctx_frame_push(ctx, co, ctx->water_level) != 0){ goto error; } stack_pointer = ctx->frame->stack_pointer; @@ -1147,34 +982,11 @@ uop_abstract_interpret_single_inst( if (new_retval == NULL) { goto error; } - PEEK(1) = new_retval; sym_copy_type_number(retval, new_retval); + PEEK(1) = new_retval; break; } - case SWAP: { - _Py_UOpsSymbolicValue *top; - _Py_UOpsSymbolicValue *bottom; - top = stack_pointer[-1]; - bottom = stack_pointer[-2 - (oparg-2)]; - assert(oparg >= 2); - - _Py_UOpsSymbolicValue *new_top = sym_init_unknown(ctx); - if (new_top == NULL) { - goto error; - } - sym_copy_type_number(top, new_top); - - _Py_UOpsSymbolicValue *new_bottom = sym_init_unknown(ctx); - if (new_bottom == NULL) { - goto error; - } - sym_copy_type_number(bottom, new_bottom); - - stack_pointer[-2 - (oparg-2)] = new_top; - stack_pointer[-1] = new_bottom; - break; - } case _SET_IP: case _CHECK_VALIDITY: case _SAVE_RETURN_OFFSET: @@ -1212,14 +1024,6 @@ uop_abstract_interpret( int curr_stacklen ) { - -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif bool did_loop_peel = false; _Py_UOpsAbstractInterpContext *ctx = NULL; diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index ed7c800575f5b1..83baba1d198f84 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -8,9 +8,7 @@ "STORE_FAST", "STORE_FAST_MAYBE_NULL", "COPY", - "POP_TOP", "PUSH_NULL", - "SWAP", # Frame stuff "_PUSH_FRAME", "_POP_FRAME", diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index c30691893aec86..d51cb5df4addde 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -206,7 +206,7 @@ def _write_body_abstract_interp_pure_uop( mangled_uop.stack.inputs ) - if uop.name == "_NOP": + if uop.name in {"_NOP", "_SWAP", "_POP_TOP"}: return assert ( @@ -232,7 +232,7 @@ def _write_body_abstract_interp_pure_uop( ] ) - out.emit(f"if ({predicates or 0}){{\n") + out.emit(f"if ({predicates or 0}) {{\n") declare_variables(uop, out, default_type="PyObject *") for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): out.emit(f"{var.name} = get_const({mangled_var.name});\n") @@ -244,7 +244,7 @@ def _write_body_abstract_interp_pure_uop( out.emit(f"if({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") out.emit(f"shrink_stack.oparg = {len(uop.stack.inputs)};\n") out.emit(f" if (emit_const(&ctx->emitter, {const_val}, shrink_stack) < 0) {{ goto error; }}\n") - out.emit("new_inst.opcode = _NOP;") + out.emit("new_inst.opcode = _NOP;\n") out.emit("}\n") out.emit("else {\n") sym = new_sym(None) @@ -304,7 +304,7 @@ def _write_body_abstract_interp_guard_uop( out.emit("\n") # Guard elimination out.emit('DPRINTF(3, "const eliminated guard\\n");\n') - out.emit("new_inst.opcode = _NOP;") + out.emit("new_inst.opcode = _NOP;\n") out.emit("break;\n") out.emit("}\n") @@ -339,9 +339,9 @@ def _write_body_abstract_interp_guard_uop( ) out.emit("// Type guard elimination\n") - out.emit(f"if ({' && '.join(predicates)}){{\n") + out.emit(f"if ({' && '.join(predicates)}) {{\n") out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') - out.emit("new_inst.opcode = _NOP;") + out.emit("new_inst.opcode = _NOP;\n") out.emit("break;\n") out.emit("}\n") # Else we need the guard @@ -362,20 +362,14 @@ def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) - and mangled_uop.name in NO_CONST_OR_TYPE_EVALUATE ): for var in reversed(mangled_uop.stack.inputs): - old_var_name = var.name - # code smell, but basically impure ops don't use any of their inputs - if is_impure: - var.name = "unused" - out.emit(stack.pop(var)) - var.name = old_var_name + definition = stack.pop(var) + if not is_impure: + out.emit(definition) if not mangled_uop.properties.stores_sp: for i, var in enumerate(mangled_uop.stack.outputs): - old_var_name = var.name - # Code smell, but impure variadic ops don't use their outputs either. - if is_impure and var.size != "1": - var.name = "unused" - out.emit(stack.push(var)) - var.name = old_var_name + definition = stack.push(var) + if not (is_impure and var.size != "1"): + out.emit(definition) if uop.properties.pure: _write_body_abstract_interp_pure_uop(mangled_uop, uop, out, stack) elif uop.properties.guard: From 262f978ce210f7686ad71955b3b7dccfed2c72de Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 11:51:22 +0800 Subject: [PATCH 065/111] remove sym_copy_type_number --- Python/optimizer_analysis.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 42bfdc46c021f2..dcb5f72662fe4d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -547,12 +547,6 @@ sym_set_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t r } } -static void -sym_copy_type_number(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym) -{ - to_sym->ty_number = from_sym->ty_number; -} - // Note: for this, to_sym MUST point to brand new sym. static void sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym) @@ -960,7 +954,7 @@ uop_abstract_interpret_single_inst( // Cannot determine statically, so we can't propagate types. if (!sym_is_type(self_or_null, SELF_OR_NULL)) { for (int i = 0; i < argcount; i++) { - sym_copy_type_number(args[i], ctx->frame->locals[i]); + ctx->frame->locals[i] = args[i]; } } break; @@ -978,12 +972,7 @@ uop_abstract_interpret_single_inst( stack_pointer = ctx->frame->stack_pointer; // Push retval into new frame. STACK_GROW(1); - _Py_UOpsSymbolicValue *new_retval = sym_init_unknown(ctx); - if (new_retval == NULL) { - goto error; - } - sym_copy_type_number(retval, new_retval); - PEEK(1) = new_retval; + PEEK(1) = retval; break; } From 4a9e2b4b00ac742d8e0f3018430f19b60613cb7c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 12:01:10 +0800 Subject: [PATCH 066/111] remove symbolic value, use symbolic type --- Python/abstract_interp_cases.c.h | 410 +++++++++--------- Python/optimizer_analysis.c | 185 ++++---- .../tier2_abstract_generator.py | 14 +- 3 files changed, 286 insertions(+), 323 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index fc1ff9659d1d0e..75b9e0350aaf80 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -19,18 +19,18 @@ /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ case _POP_TOP: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = stack_pointer[-1]; stack_pointer += -1; break; } case _END_SEND: { - _Py_UOpsSymbolicValue *__value_; - _Py_UOpsSymbolicValue *__receiver_; + _Py_UOpsSymType *__value_; + _Py_UOpsSymType *__receiver_; __value_ = stack_pointer[-1]; __receiver_ = stack_pointer[-2]; - __value_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __value_ = _Py_UOpsSymType_New(ctx, NULL); if (__value_ == NULL) { goto error; } stack_pointer[-2] = __value_; stack_pointer += -1; @@ -38,7 +38,7 @@ } case _UNARY_NEGATIVE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -46,8 +46,8 @@ } case _UNARY_NOT: { - _Py_UOpsSymbolicValue *__value_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__value_; + _Py_UOpsSymType *__res_; __value_ = stack_pointer[-1]; // Constant evaluation if (is_const(__value_)) { @@ -56,14 +56,14 @@ value = get_const(__value_); assert(PyBool_Check(value)); res = Py_IsFalse(value) ? Py_True : Py_False; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 1; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -72,7 +72,7 @@ } case _TO_BOOL: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -80,7 +80,7 @@ } case _TO_BOOL_BOOL: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = stack_pointer[-1]; // Constant evaluation if (is_const(__value_)) { @@ -96,7 +96,7 @@ } case _TO_BOOL_INT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -104,7 +104,7 @@ } case _TO_BOOL_LIST: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -112,7 +112,7 @@ } case _TO_BOOL_NONE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -120,7 +120,7 @@ } case _TO_BOOL_STR: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -128,7 +128,7 @@ } case _TO_BOOL_ALWAYS_TRUE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -136,7 +136,7 @@ } case _UNARY_INVERT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -144,8 +144,8 @@ } case _GUARD_BOTH_INT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -162,23 +162,23 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYLONG_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYLONG_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0); } break; } case _BINARY_OP_MULTIPLY_INT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -192,14 +192,14 @@ res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -211,9 +211,9 @@ } case _BINARY_OP_ADD_INT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -227,14 +227,14 @@ res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -246,9 +246,9 @@ } case _BINARY_OP_SUBTRACT_INT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -262,14 +262,14 @@ res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -281,8 +281,8 @@ } case _GUARD_BOTH_FLOAT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -299,23 +299,23 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYFLOAT_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYFLOAT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0); } break; } case _BINARY_OP_MULTIPLY_FLOAT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -330,14 +330,14 @@ ((PyFloatObject *)left)->ob_fval * ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -349,9 +349,9 @@ } case _BINARY_OP_ADD_FLOAT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -366,14 +366,14 @@ ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -385,9 +385,9 @@ } case _BINARY_OP_SUBTRACT_FLOAT: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -402,14 +402,14 @@ ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -421,8 +421,8 @@ } case _GUARD_BOTH_UNICODE: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -439,23 +439,23 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__left_, PYUNICODE_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicValue *)__right_, PYUNICODE_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0); } break; } case _BINARY_OP_ADD_UNICODE: { - _Py_UOpsSymbolicValue *__right_; - _Py_UOpsSymbolicValue *__left_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; // Constant evaluation @@ -469,14 +469,14 @@ res = PyUnicode_Concat(left, right); if (res == NULL) goto pop_2_error_tier_two; - __res_ = _Py_UOpsSymbolicValue_New(ctx, (PyObject *)res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } shrink_stack.oparg = 2; if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } new_inst.opcode = _NOP; } else { - __res_ = _Py_UOpsSymbolicValue_New(ctx, NULL); + __res_ = _Py_UOpsSymType_New(ctx, NULL); if (__res_ == NULL) { goto error; } } if (__res_ == NULL) goto error; @@ -488,7 +488,7 @@ } case _BINARY_SUBSCR: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -497,7 +497,7 @@ } case _BINARY_SLICE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-3] = __res_; @@ -511,7 +511,7 @@ } case _BINARY_SUBSCR_LIST_INT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -520,7 +520,7 @@ } case _BINARY_SUBSCR_STR_INT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -529,7 +529,7 @@ } case _BINARY_SUBSCR_TUPLE_INT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -538,7 +538,7 @@ } case _BINARY_SUBSCR_DICT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -579,7 +579,7 @@ } case _CALL_INTRINSIC_1: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -587,7 +587,7 @@ } case _CALL_INTRINSIC_2: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -600,7 +600,7 @@ /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ case _GET_AITER: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -608,7 +608,7 @@ } case _GET_ANEXT: { - _Py_UOpsSymbolicValue *__awaitable_; + _Py_UOpsSymType *__awaitable_; __awaitable_ = sym_init_unknown(ctx); if(__awaitable_ == NULL) goto error; stack_pointer[0] = __awaitable_; @@ -617,7 +617,7 @@ } case _GET_AWAITABLE: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -636,7 +636,7 @@ } case _LOAD_ASSERTION_ERROR: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -645,7 +645,7 @@ } case _LOAD_BUILD_CLASS: { - _Py_UOpsSymbolicValue *__bc_; + _Py_UOpsSymType *__bc_; __bc_ = sym_init_unknown(ctx); if(__bc_ == NULL) goto error; stack_pointer[0] = __bc_; @@ -731,7 +731,7 @@ } case _LOAD_LOCALS: { - _Py_UOpsSymbolicValue *__locals_; + _Py_UOpsSymType *__locals_; __locals_ = sym_init_unknown(ctx); if(__locals_ == NULL) goto error; stack_pointer[0] = __locals_; @@ -740,7 +740,7 @@ } case _LOAD_FROM_DICT_OR_GLOBALS: { - _Py_UOpsSymbolicValue *__v_; + _Py_UOpsSymType *__v_; __v_ = sym_init_unknown(ctx); if(__v_ == NULL) goto error; stack_pointer[-1] = __v_; @@ -748,7 +748,7 @@ } case _LOAD_NAME: { - _Py_UOpsSymbolicValue *__v_; + _Py_UOpsSymType *__v_; __v_ = sym_init_unknown(ctx); if(__v_ == NULL) goto error; stack_pointer[0] = __v_; @@ -757,8 +757,8 @@ } case _LOAD_GLOBAL: { - _Py_UOpsSymbolicValue *__res_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -779,8 +779,8 @@ } case _LOAD_GLOBAL_MODULE: { - _Py_UOpsSymbolicValue *__res_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -793,8 +793,8 @@ } case _LOAD_GLOBAL_BUILTINS: { - _Py_UOpsSymbolicValue *__res_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -819,7 +819,7 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[-1] = __value_; @@ -827,7 +827,7 @@ } case _LOAD_DEREF: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -845,7 +845,7 @@ } case _BUILD_STRING: { - _Py_UOpsSymbolicValue *__str_; + _Py_UOpsSymType *__str_; __str_ = sym_init_unknown(ctx); if(__str_ == NULL) goto error; stack_pointer[-oparg] = __str_; @@ -854,7 +854,7 @@ } case _BUILD_TUPLE: { - _Py_UOpsSymbolicValue *__tup_; + _Py_UOpsSymType *__tup_; __tup_ = sym_init_unknown(ctx); if(__tup_ == NULL) goto error; stack_pointer[-oparg] = __tup_; @@ -863,7 +863,7 @@ } case _BUILD_LIST: { - _Py_UOpsSymbolicValue *__list_; + _Py_UOpsSymType *__list_; __list_ = sym_init_unknown(ctx); if(__list_ == NULL) goto error; stack_pointer[-oparg] = __list_; @@ -882,7 +882,7 @@ } case _BUILD_SET: { - _Py_UOpsSymbolicValue *__set_; + _Py_UOpsSymType *__set_; __set_ = sym_init_unknown(ctx); if(__set_ == NULL) goto error; stack_pointer[-oparg] = __set_; @@ -891,7 +891,7 @@ } case _BUILD_MAP: { - _Py_UOpsSymbolicValue *__map_; + _Py_UOpsSymType *__map_; __map_ = sym_init_unknown(ctx); if(__map_ == NULL) goto error; stack_pointer[-oparg*2] = __map_; @@ -904,7 +904,7 @@ } case _BUILD_CONST_KEY_MAP: { - _Py_UOpsSymbolicValue *__map_; + _Py_UOpsSymType *__map_; __map_ = sym_init_unknown(ctx); if(__map_ == NULL) goto error; stack_pointer[-1 - oparg] = __map_; @@ -930,7 +930,7 @@ /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ case _LOAD_SUPER_ATTR_ATTR: { - _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymType *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-3] = __attr_; @@ -939,8 +939,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__self_or_null_; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_or_null_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); @@ -952,8 +952,8 @@ } case _LOAD_ATTR: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__self_or_null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_or_null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_or_null_ = sym_init_unknown(ctx); @@ -966,7 +966,7 @@ } case _GUARD_TYPE_VERSION: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -982,14 +982,14 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); } break; } @@ -999,8 +999,8 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1013,7 +1013,7 @@ } case _CHECK_ATTR_MODULE: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -1033,8 +1033,8 @@ } case _LOAD_ATTR_MODULE: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1047,7 +1047,7 @@ } case _CHECK_ATTR_WITH_HINT: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1067,8 +1067,8 @@ } case _LOAD_ATTR_WITH_HINT: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1081,8 +1081,8 @@ } case _LOAD_ATTR_SLOT: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1095,7 +1095,7 @@ } case _CHECK_ATTR_CLASS: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -1114,8 +1114,8 @@ } case _LOAD_ATTR_CLASS: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__null_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __null_ = sym_init_unknown(ctx); @@ -1132,7 +1132,7 @@ /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ case _GUARD_DORV_VALUES: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1147,14 +1147,14 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); } break; } @@ -1172,7 +1172,7 @@ } case _COMPARE_OP: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1181,7 +1181,7 @@ } case _COMPARE_OP_FLOAT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1190,7 +1190,7 @@ } case _COMPARE_OP_INT: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1199,7 +1199,7 @@ } case _COMPARE_OP_STR: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1208,7 +1208,7 @@ } case _IS_OP: { - _Py_UOpsSymbolicValue *__b_; + _Py_UOpsSymType *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-2] = __b_; @@ -1217,7 +1217,7 @@ } case _CONTAINS_OP: { - _Py_UOpsSymbolicValue *__b_; + _Py_UOpsSymType *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-2] = __b_; @@ -1226,8 +1226,8 @@ } case _CHECK_EG_MATCH: { - _Py_UOpsSymbolicValue *__rest_; - _Py_UOpsSymbolicValue *__match_; + _Py_UOpsSymType *__rest_; + _Py_UOpsSymType *__match_; __rest_ = sym_init_unknown(ctx); if(__rest_ == NULL) goto error; __match_ = sym_init_unknown(ctx); @@ -1238,7 +1238,7 @@ } case _CHECK_EXC_MATCH: { - _Py_UOpsSymbolicValue *__b_; + _Py_UOpsSymType *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-1] = __b_; @@ -1252,7 +1252,7 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - _Py_UOpsSymbolicValue *__b_; + _Py_UOpsSymType *__b_; __b_ = sym_init_unknown(ctx); if(__b_ == NULL) goto error; stack_pointer[-1] = __b_; @@ -1260,7 +1260,7 @@ } case _GET_LEN: { - _Py_UOpsSymbolicValue *__len_o_; + _Py_UOpsSymType *__len_o_; __len_o_ = sym_init_unknown(ctx); if(__len_o_ == NULL) goto error; stack_pointer[0] = __len_o_; @@ -1269,7 +1269,7 @@ } case _MATCH_CLASS: { - _Py_UOpsSymbolicValue *__attrs_; + _Py_UOpsSymType *__attrs_; __attrs_ = sym_init_unknown(ctx); if(__attrs_ == NULL) goto error; stack_pointer[-3] = __attrs_; @@ -1278,7 +1278,7 @@ } case _MATCH_MAPPING: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1287,7 +1287,7 @@ } case _MATCH_SEQUENCE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1296,7 +1296,7 @@ } case _MATCH_KEYS: { - _Py_UOpsSymbolicValue *__values_or_none_; + _Py_UOpsSymType *__values_or_none_; __values_or_none_ = sym_init_unknown(ctx); if(__values_or_none_ == NULL) goto error; stack_pointer[0] = __values_or_none_; @@ -1305,7 +1305,7 @@ } case _GET_ITER: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -1313,7 +1313,7 @@ } case _GET_YIELD_FROM_ITER: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = sym_init_unknown(ctx); if(__iter_ == NULL) goto error; stack_pointer[-1] = __iter_; @@ -1323,7 +1323,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - _Py_UOpsSymbolicValue *__next_; + _Py_UOpsSymType *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1334,7 +1334,7 @@ /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ case _ITER_CHECK_LIST: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1352,7 +1352,7 @@ /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_LIST: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1372,7 +1372,7 @@ } case _ITER_NEXT_LIST: { - _Py_UOpsSymbolicValue *__next_; + _Py_UOpsSymType *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1381,7 +1381,7 @@ } case _ITER_CHECK_TUPLE: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1399,7 +1399,7 @@ /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_TUPLE: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1419,7 +1419,7 @@ } case _ITER_NEXT_TUPLE: { - _Py_UOpsSymbolicValue *__next_; + _Py_UOpsSymType *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; stack_pointer[0] = __next_; @@ -1428,7 +1428,7 @@ } case _ITER_CHECK_RANGE: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1447,7 +1447,7 @@ /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ case _GUARD_NOT_EXHAUSTED_RANGE: { - _Py_UOpsSymbolicValue *__iter_; + _Py_UOpsSymType *__iter_; __iter_ = stack_pointer[-1]; // Constant evaluation if (is_const(__iter_)) { @@ -1465,7 +1465,7 @@ } case _ITER_NEXT_RANGE: { - _Py_UOpsSymbolicValue *__next_; + _Py_UOpsSymType *__next_; __next_ = sym_init_unknown(ctx); if(__next_ == NULL) goto error; sym_set_type(__next_, PYLONG_TYPE, 0); @@ -1477,8 +1477,8 @@ /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ case _BEFORE_ASYNC_WITH: { - _Py_UOpsSymbolicValue *__exit_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__exit_; + _Py_UOpsSymType *__res_; __exit_ = sym_init_unknown(ctx); if(__exit_ == NULL) goto error; __res_ = sym_init_unknown(ctx); @@ -1490,8 +1490,8 @@ } case _BEFORE_WITH: { - _Py_UOpsSymbolicValue *__exit_; - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__exit_; + _Py_UOpsSymType *__res_; __exit_ = sym_init_unknown(ctx); if(__exit_ == NULL) goto error; __res_ = sym_init_unknown(ctx); @@ -1503,7 +1503,7 @@ } case _WITH_EXCEPT_START: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[0] = __res_; @@ -1512,8 +1512,8 @@ } case _PUSH_EXC_INFO: { - _Py_UOpsSymbolicValue *__prev_exc_; - _Py_UOpsSymbolicValue *__new_exc_; + _Py_UOpsSymType *__prev_exc_; + _Py_UOpsSymType *__new_exc_; __prev_exc_ = sym_init_unknown(ctx); if(__prev_exc_ == NULL) goto error; __new_exc_ = sym_init_unknown(ctx); @@ -1525,7 +1525,7 @@ } case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1540,20 +1540,20 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); } break; } case _GUARD_KEYS_VERSION: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation @@ -1569,21 +1569,21 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); } break; } case _LOAD_ATTR_METHOD_WITH_VALUES: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__self_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1595,8 +1595,8 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__self_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1608,7 +1608,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymType *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-1] = __attr_; @@ -1617,7 +1617,7 @@ } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - _Py_UOpsSymbolicValue *__attr_; + _Py_UOpsSymType *__attr_; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; stack_pointer[-1] = __attr_; @@ -1626,7 +1626,7 @@ } case _CHECK_ATTR_METHOD_LAZY_DICT: { - _Py_UOpsSymbolicValue *__owner_; + _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; // Constant evaluation if (is_const(__owner_)) { @@ -1646,8 +1646,8 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - _Py_UOpsSymbolicValue *__attr_; - _Py_UOpsSymbolicValue *__self_ = NULL; + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1663,8 +1663,8 @@ /* _CALL is not a viable micro-op for tier 2 */ case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UOpsSymbolicValue *__null_; - _Py_UOpsSymbolicValue *__callable_; + _Py_UOpsSymType *__null_; + _Py_UOpsSymType *__callable_; __null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; // Constant evaluation @@ -1681,22 +1681,22 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0)) { + if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__callable_, PYMETHOD_TYPE, (uint32_t)0); - sym_set_type((_Py_UOpsSymbolicValue *)__null_, NULL_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0); } break; } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UOpsSymbolicValue *__func_; - _Py_UOpsSymbolicValue *__self_; + _Py_UOpsSymType *__func_; + _Py_UOpsSymType *__self_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; __self_ = sym_init_unknown(ctx); @@ -1711,8 +1711,8 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - _Py_UOpsSymbolicValue *__self_or_null_; - _Py_UOpsSymbolicValue *__callable_; + _Py_UOpsSymType *__self_or_null_; + _Py_UOpsSymType *__callable_; __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)CURRENT_OPERAND(); @@ -1733,14 +1733,14 @@ break; } // Type guard elimination - if (sym_matches_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { + if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { DPRINTF(2, "type propagation eliminated guard\n"); new_inst.opcode = _NOP; break; } else { // Type propagation - sym_set_type((_Py_UOpsSymbolicValue *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); + sym_set_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); } break; } @@ -1752,7 +1752,7 @@ /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ case _CALL_TYPE_1: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1761,7 +1761,7 @@ } case _CALL_STR_1: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1770,7 +1770,7 @@ } case _CALL_TUPLE_1: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1786,7 +1786,7 @@ } case _CALL_BUILTIN_CLASS: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1795,7 +1795,7 @@ } case _CALL_BUILTIN_O: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1804,7 +1804,7 @@ } case _CALL_BUILTIN_FAST: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1813,7 +1813,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1822,7 +1822,7 @@ } case _CALL_LEN: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1831,7 +1831,7 @@ } case _CALL_ISINSTANCE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1840,7 +1840,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1849,7 +1849,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1858,7 +1858,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1867,7 +1867,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2 - oparg] = __res_; @@ -1884,7 +1884,7 @@ /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - _Py_UOpsSymbolicValue *__func_; + _Py_UOpsSymType *__func_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; stack_pointer[-1] = __func_; @@ -1892,7 +1892,7 @@ } case _SET_FUNCTION_ATTRIBUTE: { - _Py_UOpsSymbolicValue *__func_; + _Py_UOpsSymType *__func_; __func_ = sym_init_unknown(ctx); if(__func_ == NULL) goto error; stack_pointer[-2] = __func_; @@ -1901,7 +1901,7 @@ } case _BUILD_SLICE: { - _Py_UOpsSymbolicValue *__slice_; + _Py_UOpsSymType *__slice_; __slice_ = sym_init_unknown(ctx); if(__slice_ == NULL) goto error; stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = __slice_; @@ -1910,7 +1910,7 @@ } case _CONVERT_VALUE: { - _Py_UOpsSymbolicValue *__result_; + _Py_UOpsSymType *__result_; __result_ = sym_init_unknown(ctx); if(__result_ == NULL) goto error; stack_pointer[-1] = __result_; @@ -1918,7 +1918,7 @@ } case _FORMAT_SIMPLE: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-1] = __res_; @@ -1926,7 +1926,7 @@ } case _FORMAT_WITH_SPEC: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1935,7 +1935,7 @@ } case _BINARY_OP: { - _Py_UOpsSymbolicValue *__res_; + _Py_UOpsSymType *__res_; __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; stack_pointer[-2] = __res_; @@ -1944,8 +1944,8 @@ } case _SWAP: { - _Py_UOpsSymbolicValue *__top_; - _Py_UOpsSymbolicValue *__bottom_; + _Py_UOpsSymType *__top_; + _Py_UOpsSymType *__bottom_; __top_ = stack_pointer[-1]; __bottom_ = stack_pointer[-2 - (oparg-2)]; stack_pointer[-2 - (oparg-2)] = __top_; @@ -2004,7 +2004,7 @@ } case _LOAD_CONST_INLINE: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; @@ -2013,7 +2013,7 @@ } case _LOAD_CONST_INLINE_BORROW: { - _Py_UOpsSymbolicValue *__value_; + _Py_UOpsSymType *__value_; __value_ = sym_init_unknown(ctx); if(__value_ == NULL) goto error; stack_pointer[0] = __value_; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index dcb5f72662fe4d..6e39d64cd9fbad 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -101,14 +101,6 @@ typedef struct { } _Py_UOpsSymType; -typedef struct _Py_UOpsSymbolicValue { - // Value numbering but only for types and constant values. - // https://en.wikipedia.org/wiki/Value_numbering - _Py_UOpsSymType *ty_number; - // More fields can be added later if we want to support - // more optimizations. -} _Py_UOpsSymbolicValue; - typedef struct _Py_UOpsAbstractFrame { PyObject_HEAD // Strong reference. @@ -117,14 +109,14 @@ typedef struct _Py_UOpsAbstractFrame { struct _Py_UOpsAbstractFrame *next; // Symbolic version of co_consts int sym_consts_len; - _Py_UOpsSymbolicValue **sym_consts; + _Py_UOpsSymType **sym_consts; // Max stacklen int stack_len; int locals_len; - _Py_UOpsSymbolicValue **stack_pointer; - _Py_UOpsSymbolicValue **stack; - _Py_UOpsSymbolicValue **locals; + _Py_UOpsSymType **stack_pointer; + _Py_UOpsSymType **stack; + _Py_UOpsSymType **locals; } _Py_UOpsAbstractFrame; static void @@ -145,12 +137,6 @@ PyTypeObject _Py_UOpsAbstractFrame_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -typedef struct sym_arena { - char *curr_available; - char *end; - char *arena; -} sym_arena; - typedef struct ty_arena { int ty_curr_number; int ty_max_number; @@ -158,7 +144,7 @@ typedef struct ty_arena { } ty_arena; typedef struct frequent_syms { - _Py_UOpsSymbolicValue *push_nulL_sym; + _Py_UOpsSymType *push_nulL_sym; } frequent_syms; typedef struct uops_emitter { @@ -173,20 +159,16 @@ typedef struct _Py_UOpsAbstractInterpContext { // The current "executing" frame. _Py_UOpsAbstractFrame *frame; - // Arena for the symbolic expression themselves. - sym_arena s_arena; - // Arena for the symbolic expressions' types. - // This is separate from the s_arena so that we can free - // all the constants easily. + // Arena for the symbolic types. ty_arena t_arena; frequent_syms frequent_syms; uops_emitter emitter; - _Py_UOpsSymbolicValue **water_level; - _Py_UOpsSymbolicValue **limit; - _Py_UOpsSymbolicValue *localsplus[1]; + _Py_UOpsSymType **water_level; + _Py_UOpsSymType **limit; + _Py_UOpsSymType *localsplus[1]; } _Py_UOpsAbstractInterpContext; static void @@ -194,22 +176,21 @@ abstractinterp_dealloc(PyObject *o) { _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; Py_XDECREF(self->frame); - if (self->s_arena.arena != NULL) { + if (self->t_arena.arena != NULL) { int tys = self->t_arena.ty_curr_number; for (int i = 0; i < tys; i++) { Py_CLEAR(self->t_arena.arena[i].const_val); } } PyMem_Free(self->t_arena.arena); - PyMem_Free(self->s_arena.arena); Py_TYPE(self)->tp_free((PyObject *)self); } PyTypeObject _Py_UOpsAbstractInterpContext_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's context", - .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymbolicValue *), - .tp_itemsize = sizeof(_Py_UOpsSymbolicValue *), + .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymType *), + .tp_itemsize = sizeof(_Py_UOpsSymType *), .tp_dealloc = (destructor)abstractinterp_dealloc, .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION @@ -222,7 +203,7 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, static inline int frame_push(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymbolicValue **localsplus_start, + _Py_UOpsSymType **localsplus_start, int locals_len, int curr_stacklen, int total_len); @@ -243,21 +224,15 @@ abstractinterp_context_new(PyCodeObject *co, _Py_UOpsAbstractInterpContext *self = NULL; char *arena = NULL; _Py_UOpsSymType *t_arena = NULL; - Py_ssize_t arena_size = (sizeof(_Py_UOpsSymbolicValue)) * ir_entries * OVERALLOCATE_FACTOR; Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; - - arena = (char *)PyMem_Malloc(arena_size); - if (arena == NULL) { - goto error; - } - t_arena = (_Py_UOpsSymType *)PyMem_Malloc(ty_arena_size); if (t_arena == NULL) { goto error; } + self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, &_Py_UOpsAbstractInterpContext_Type, MAX_ABSTRACT_INTERP_SIZE); @@ -273,10 +248,6 @@ abstractinterp_context_new(PyCodeObject *co, // Setup the arena for sym expressions. - self->s_arena.arena = arena; - self->s_arena.curr_available = arena; - assert(arena_size > 0); - self->s_arena.end = arena + arena_size; self->t_arena.ty_curr_number = 0; self->t_arena.arena = t_arena; self->t_arena.ty_max_number = ir_entries * OVERALLOCATE_FACTOR; @@ -313,7 +284,6 @@ abstractinterp_context_new(PyCodeObject *co, if (self != NULL) { // Important so we don't double free them. self->t_arena.arena = NULL; - self->s_arena.arena = NULL; self->frame = NULL; } Py_XDECREF(self); @@ -321,19 +291,19 @@ abstractinterp_context_new(PyCodeObject *co, return NULL; } -static inline _Py_UOpsSymbolicValue* +static inline _Py_UOpsSymType* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx); -static inline _Py_UOpsSymbolicValue ** +static inline _Py_UOpsSymType ** create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) { Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); - _Py_UOpsSymbolicValue **sym_consts = PyMem_New(_Py_UOpsSymbolicValue *, co_const_len); + _Py_UOpsSymType **sym_consts = PyMem_New(_Py_UOpsSymType *, co_const_len); if (sym_consts == NULL) { return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { - _Py_UOpsSymbolicValue *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); + _Py_UOpsSymType *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); if (res == NULL) { goto error; } @@ -347,11 +317,11 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) } -static inline _Py_UOpsSymbolicValue* +static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); static void -sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym); +sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym); /* * The reason why we have a separate frame_push and frame_initialize is to mimic @@ -360,7 +330,7 @@ sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicVa static inline int frame_push(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymbolicValue **localsplus_start, + _Py_UOpsSymType **localsplus_start, int locals_len, int curr_stacklen, int total_len) @@ -381,7 +351,7 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame { // Initialize with the initial state of all local variables for (int i = 0; i < locals_len; i++) { - _Py_UOpsSymbolicValue *local = sym_init_unknown(ctx); + _Py_UOpsSymType *local = sym_init_unknown(ctx); if (local == NULL) { goto error; } @@ -391,7 +361,7 @@ frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame // Initialize the stack as well for (int i = 0; i < curr_stacklen; i++) { - _Py_UOpsSymbolicValue *stackvar = sym_init_unknown(ctx); + _Py_UOpsSymType *stackvar = sym_init_unknown(ctx); if (stackvar == NULL) { goto error; } @@ -409,7 +379,7 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts, int stack_len, int locals_len, int curr_stacklen) { - _Py_UOpsSymbolicValue **sym_consts = create_sym_consts(ctx, co_consts); + _Py_UOpsSymType **sym_consts = create_sym_consts(ctx, co_consts); if (sym_consts == NULL) { return NULL; } @@ -432,12 +402,12 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, } static inline bool -sym_is_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ); +sym_is_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); static inline uint64_t -sym_type_get_refinement(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ); +sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); static inline PyFunctionObject * -extract_func_from_sym(_Py_UOpsSymbolicValue *callable_sym) +extract_func_from_sym(_Py_UOpsSymType *callable_sym) { assert(callable_sym != NULL); if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { @@ -460,7 +430,7 @@ static int ctx_frame_push( _Py_UOpsAbstractInterpContext *ctx, PyCodeObject *co, - _Py_UOpsSymbolicValue **localsplus_start + _Py_UOpsSymType **localsplus_start ) { _Py_UOpsAbstractFrame *frame = frame_new(ctx, @@ -503,33 +473,26 @@ ctx_frame_pop( } static void -sym_set_type_from_const(_Py_UOpsSymbolicValue *sym, PyObject *obj); +sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj); // Steals a reference to const_val -static _Py_UOpsSymbolicValue* -_Py_UOpsSymbolicValue_New(_Py_UOpsAbstractInterpContext *ctx, +static _Py_UOpsSymType* +_Py_UOpsSymType_New(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) { - _Py_UOpsSymbolicValue *self = (_Py_UOpsSymbolicValue *)ctx->s_arena.curr_available; - ctx->s_arena.curr_available += sizeof(_Py_UOpsSymbolicValue) + sizeof(_Py_UOpsSymbolicValue *); - if (ctx->s_arena.curr_available >= ctx->s_arena.end) { - OPT_STAT_INC(optimizer_failure_reason_no_memory); - DPRINTF(1, "out of space for symbolic expression\n"); - return NULL; - } - - _Py_UOpsSymType *ty = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + _Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { OPT_STAT_INC(optimizer_failure_reason_no_memory); DPRINTF(1, "out of space for symbolic expression type\n"); return NULL; } ctx->t_arena.ty_curr_number++; - ty->const_val = NULL; - ty->types = 0; + self->const_val = NULL; + self->types = 0; + + self->const_val = NULL; + self->types = 0; - self->ty_number = ty; - self->ty_number->types = 0; if (const_val != NULL) { sym_set_type_from_const(self, const_val); } @@ -539,30 +502,30 @@ _Py_UOpsSymbolicValue_New(_Py_UOpsAbstractInterpContext *ctx, static void -sym_set_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +sym_set_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) { - sym->ty_number->types |= 1 << typ; + sym->types |= 1 << typ; if (typ <= MAX_TYPE_WITH_REFINEMENT) { - sym->ty_number->refinement[typ] = refinement; + sym->refinement[typ] = refinement; } } // Note: for this, to_sym MUST point to brand new sym. static void -sym_copy_immutable_type_info(_Py_UOpsSymbolicValue *from_sym, _Py_UOpsSymbolicValue *to_sym) +sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) { - to_sym->ty_number->types = (from_sym->ty_number->types & IMMUTABLES); - if (to_sym->ty_number->types) { - to_sym->ty_number->const_val = Py_XNewRef(from_sym->ty_number->const_val); + to_sym->types = (from_sym->types & IMMUTABLES); + if (to_sym->types) { + to_sym->const_val = Py_XNewRef(from_sym->const_val); } } // Steals a reference to obj static void -sym_set_type_from_const(_Py_UOpsSymbolicValue *sym, PyObject *obj) +sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) { PyTypeObject *tp = Py_TYPE(obj); - sym->ty_number->const_val = obj; + sym->const_val = obj; if (tp == &PyLong_Type) { sym_set_type(sym, PYLONG_TYPE, 0); @@ -597,18 +560,18 @@ sym_set_type_from_const(_Py_UOpsSymbolicValue *sym, PyObject *obj) } -static inline _Py_UOpsSymbolicValue* +static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) { - return _Py_UOpsSymbolicValue_New(ctx,NULL); + return _Py_UOpsSymType_New(ctx,NULL); } // Steals a reference to const_val -static inline _Py_UOpsSymbolicValue* +static inline _Py_UOpsSymType* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) { assert(const_val != NULL); - _Py_UOpsSymbolicValue *temp = _Py_UOpsSymbolicValue_New( + _Py_UOpsSymType *temp = _Py_UOpsSymType_New( ctx, const_val ); @@ -619,13 +582,13 @@ sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int cons return temp; } -static _Py_UOpsSymbolicValue* +static _Py_UOpsSymType* sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) { if (ctx->frequent_syms.push_nulL_sym != NULL) { return ctx->frequent_syms.push_nulL_sym; } - _Py_UOpsSymbolicValue *null_sym = sym_init_unknown(ctx); + _Py_UOpsSymType *null_sym = sym_init_unknown(ctx); if (null_sym == NULL) { return NULL; } @@ -635,32 +598,32 @@ sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) } static inline bool -sym_is_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ) +sym_is_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ) { - if ((sym->ty_number->types & (1 << typ)) == 0) { + if ((sym->types & (1 << typ)) == 0) { return false; } return true; } static inline bool -sym_matches_type(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +sym_matches_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) { if (!sym_is_type(sym, typ)) { return false; } if (typ <= MAX_TYPE_WITH_REFINEMENT) { - return sym->ty_number->refinement[typ] == refinement; + return sym->refinement[typ] == refinement; } return true; } static uint64_t -sym_type_get_refinement(_Py_UOpsSymbolicValue *sym, _Py_UOpsSymExprTypeEnum typ) +sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ) { assert(sym_is_type(sym, typ)); assert(typ <= MAX_TYPE_WITH_REFINEMENT); - return sym->ty_number->refinement[typ]; + return sym->refinement[typ]; } @@ -697,15 +660,15 @@ op_is_specially_handled(uint32_t opcode) } static inline bool -is_const(_Py_UOpsSymbolicValue *expr) +is_const(_Py_UOpsSymType *expr) { - return expr->ty_number->const_val != NULL; + return expr->const_val != NULL; } static inline PyObject * -get_const(_Py_UOpsSymbolicValue *expr) +get_const(_Py_UOpsSymType *expr) { - return expr->ty_number->const_val; + return expr->const_val; } @@ -713,7 +676,7 @@ static int clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { int locals_entries = ctx->frame->locals_len; for (int i = 0; i < locals_entries; i++) { - _Py_UOpsSymbolicValue *new_local = sym_init_unknown(ctx); + _Py_UOpsSymType *new_local = sym_init_unknown(ctx); if (new_local == NULL) { return -1; } @@ -791,7 +754,7 @@ do { \ #ifndef Py_DEBUG #define GETITEM(ctx, i) (ctx->frame->sym_consts[(i)]) #else -static inline _Py_UOpsSymbolicValue * +static inline _Py_UOpsSymType * GETITEM(_Py_UOpsAbstractInterpContext *ctx, Py_ssize_t i) { assert(i < ctx->frame->sym_consts_len); return ctx->frame->sym_consts[i]; @@ -840,7 +803,7 @@ uop_abstract_interpret_single_inst( uint32_t opcode = inst->opcode; uint64_t operand = inst->operand; - _Py_UOpsSymbolicValue **stack_pointer = ctx->frame->stack_pointer; + _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; _PyUOpInstruction new_inst = *inst; _PyUOpInstruction shrink_stack = {_SHRINK_STACK, 0, 0, 0}; @@ -853,7 +816,7 @@ uop_abstract_interpret_single_inst( // Note: LOAD_FAST_CHECK is not pure!!! case LOAD_FAST_CHECK: { STACK_GROW(1); - _Py_UOpsSymbolicValue *local = GETLOCAL(oparg); + _Py_UOpsSymType *local = GETLOCAL(oparg); // We guarantee this will error - just bail and don't optimize it. if (sym_is_type(local, NULL_TYPE)) { goto error; @@ -863,7 +826,7 @@ uop_abstract_interpret_single_inst( } case LOAD_FAST: { STACK_GROW(1); - _Py_UOpsSymbolicValue * local = GETLOCAL(oparg); + _Py_UOpsSymType * local = GETLOCAL(oparg); if (sym_is_type(local, NULL_TYPE)) { Py_UNREACHABLE(); } @@ -880,7 +843,7 @@ uop_abstract_interpret_single_inst( } case LOAD_CONST: { STACK_GROW(1); - PEEK(1) = (_Py_UOpsSymbolicValue *)GETITEM( + PEEK(1) = (_Py_UOpsSymType *)GETITEM( ctx, oparg); assert(is_const(PEEK(1))); // Peephole: inline constants. @@ -894,13 +857,13 @@ uop_abstract_interpret_single_inst( } case STORE_FAST_MAYBE_NULL: case STORE_FAST: { - _Py_UOpsSymbolicValue *value = PEEK(1); + _Py_UOpsSymType *value = PEEK(1); GETLOCAL(oparg) = value; STACK_SHRINK(1); break; } case COPY: { - _Py_UOpsSymbolicValue *bottom = PEEK(1 + (oparg - 1)); + _Py_UOpsSymType *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); PEEK(1) = bottom; break; @@ -908,7 +871,7 @@ uop_abstract_interpret_single_inst( case PUSH_NULL: { STACK_GROW(1); - _Py_UOpsSymbolicValue *null_sym = sym_init_push_null(ctx); + _Py_UOpsSymType *null_sym = sym_init_push_null(ctx); if (null_sym == NULL) { goto error; } @@ -934,9 +897,9 @@ uop_abstract_interpret_single_inst( } PyCodeObject *co = (PyCodeObject *)func->func_code; - _Py_UOpsSymbolicValue *self_or_null = PEEK(0); + _Py_UOpsSymType *self_or_null = PEEK(0); assert(self_or_null != NULL); - _Py_UOpsSymbolicValue **args = &PEEK(-1); + _Py_UOpsSymType **args = &PEEK(-1); assert(args != NULL); // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS if (!sym_is_type(self_or_null, NULL_TYPE) && @@ -962,7 +925,7 @@ uop_abstract_interpret_single_inst( case _POP_FRAME: { assert(STACK_LEVEL() == 1); - _Py_UOpsSymbolicValue *retval = PEEK(1); + _Py_UOpsSymType *retval = PEEK(1); STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index d51cb5df4addde..33f0025d7ae277 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -51,7 +51,7 @@ def declare_variables( uop: Uop, out: CWriter, - default_type: str = "_Py_UOpsSymbolicValue *", + default_type: str = "_Py_UOpsSymType *", skip_inputs: bool = False, skip_peeks: bool = False, ) -> None: @@ -66,7 +66,7 @@ def declare_variables( if var.name not in variables: type = default_type if var.size != "1" and var.type == "PyObject **": - type = "_Py_UOpsSymbolicValue **" + type = "_Py_UOpsSymType **" variables.add(var.name) if var.condition: out.emit(f"{type}{var.name} = NULL;\n") @@ -81,7 +81,7 @@ def declare_variables( variables.add(var.name) type = default_type if var.size != "1" and var.type == "PyObject **": - type = "_Py_UOpsSymbolicValue **" + type = "_Py_UOpsSymType **" if var.condition: out.emit(f"{type}{var.name} = NULL;\n") else: @@ -194,7 +194,7 @@ def new_sym( constant: str | None, ) -> str: return ( - f"_Py_UOpsSymbolicValue_New(" + f"_Py_UOpsSymType_New(" f"ctx, {constant or 'NULL'});" ) @@ -331,11 +331,11 @@ def _write_body_abstract_interp_guard_uop( aux = "0" if aux is None else aux # Check that the input type information match (including auxiliary info) predicates.append( - f"sym_matches_type((_Py_UOpsSymbolicValue *){output_var.name}, {typname}, (uint32_t){aux})" + f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" ) # Propagate mode - set the types propagates.append( - f"sym_set_type((_Py_UOpsSymbolicValue *){output_var.name}, {typname}, (uint32_t){aux})" + f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" ) out.emit("// Type guard elimination\n") @@ -417,7 +417,7 @@ def generate_tier2_abstract( if not uop.properties.always_exits: # Guards strictly only peek if not uop.properties.guard: - stack.flush(out, cast_type="_Py_UOpsSymbolicValue *") + stack.flush(out, cast_type="_Py_UOpsSymType *") out.emit("break;\n") out.start_line() out.emit("}") From 5868fb87f7692790579b9d7e6a70374cbb13f455 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 12:03:31 +0800 Subject: [PATCH 067/111] rename confusing name --- Python/optimizer_analysis.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6e39d64cd9fbad..474437f782349e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -168,7 +168,7 @@ typedef struct _Py_UOpsAbstractInterpContext { _Py_UOpsSymType **water_level; _Py_UOpsSymType **limit; - _Py_UOpsSymType *localsplus[1]; + _Py_UOpsSymType *locals_and_stack[1]; } _Py_UOpsAbstractInterpContext; static void @@ -240,10 +240,10 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - self->limit = self->localsplus + MAX_ABSTRACT_INTERP_SIZE; - self->water_level = self->localsplus; + self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + self->water_level = self->locals_and_stack; for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - self->localsplus[i] = NULL; + self->locals_and_stack[i] = NULL; } From 5a3f44f7009005a9a84fa70cafce2e2b74b37d29 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 16:27:57 +0800 Subject: [PATCH 068/111] Address Guido's review, 4x function cache Co-Authored-By: Guido van Rossum --- Include/internal/pycore_function.h | 2 +- Python/optimizer_analysis.c | 35 +++++++++++++++--------------- Python/specialize.c | 6 +++-- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 3f3da8a44b77e4..b3f88befb5c540 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -16,7 +16,7 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_MAX_WATCHERS 8 -#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ +#define FUNC_VERSION_CACHE_SIZE (1<<14) /* Must be a power of 2 */ struct _py_func_state { uint32_t next_version; // Borrowed references to function objects whose diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 474437f782349e..eccf592398db0a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -38,7 +38,7 @@ #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; - static inline int get_lltrace() { + static inline int get_lltrace(void) { char *uop_debug = Py_GETENV(DEBUG_ENV); int lltrace = 0; if (uop_debug != NULL && *uop_debug >= '0') { @@ -231,8 +231,6 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - - self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, &_Py_UOpsAbstractInterpContext_Type, MAX_ABSTRACT_INTERP_SIZE); @@ -409,19 +407,19 @@ sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); static inline PyFunctionObject * extract_func_from_sym(_Py_UOpsSymType *callable_sym) { - assert(callable_sym != NULL); - if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { - DPRINTF(1, "error: _PUSH_FRAME not function type\n"); - return NULL; - } - uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); - PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); - if (func == NULL) { - OPT_STAT_INC(optimizer_failure_reason_null_function); - DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); - return NULL; - } - return func; + assert(callable_sym != NULL); + if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { + DPRINTF(1, "error: _PUSH_FRAME not function type\n"); + return NULL; + } + uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); + PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); + if (func == NULL) { + OPT_STAT_INC(optimizer_failure_reason_null_function); + DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); + return NULL; + } + return func; } @@ -882,13 +880,14 @@ uop_abstract_interpret_single_inst( case _INIT_CALL_PY_EXACT_ARGS: { // Don't put in the new frame. Leave it be so that _PUSH_FRAME // can extract callable, self_or_null and args later. - // Set stack pointer to the callable. - stack_pointer += -1 - oparg; + // This also means our stack pointer diverges from the real VM. break; } case _PUSH_FRAME: { int argcount = oparg; + // _INIT_CALL_PY_EXACT_ARGS's real stack effect in the VM. + stack_pointer += -1 - oparg; // TOS is the new callable, above it self_or_null and args PyFunctionObject *func = extract_func_from_sym(PEEK(1)); diff --git a/Python/specialize.c b/Python/specialize.c index 39b0ad5e881179..e2b5790793b096 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -242,8 +242,10 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts); fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); - fprintf(out, "Optimization optimizer failure null function: %" PRIu64 "\n", stats->optimizer_failure_reason_null_function); - fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", stats->optimizer_failure_reason_no_memory); + fprintf(out, "Optimization optimizer failure null function: %" PRIu64 "\n", + stats->optimizer_failure_reason_null_function); + fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_memory); const char* const* names; for (int i = 0; i < 512; i++) { From f450646ea2577df3ba7a020884f55d1ef62e40dc Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 18:47:05 +0800 Subject: [PATCH 069/111] loop peel more often, add stats for loop peeling --- Include/cpython/pystats.h | 3 +++ Python/optimizer.c | 2 +- Python/optimizer_analysis.c | 16 +++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 16c5804a62e72e..91cb2136cf05a5 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -124,6 +124,9 @@ typedef struct _optimization_stats { uint64_t optimizer_successes; uint64_t optimizer_failure_reason_null_function; uint64_t optimizer_failure_reason_no_memory; + uint64_t loop_body_duplication_attempts; + uint64_t loop_body_duplication_successes; + uint64_t loop_body_duplication_no_mem; } OptimizationStats; typedef struct _stats { diff --git a/Python/optimizer.c b/Python/optimizer.c index 09fa85bc0fecff..6c60fee914c088 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -19,7 +19,7 @@ // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 512 // This the above + additional working space we need. -#define UOP_MAX_TRACE_WORKING_LENGTH UOP_MAX_TRACE_LENGTH * 2 +#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 2) #define MAX_EXECUTORS_SIZE 256 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index eccf592398db0a..a261316dfe4cda 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -989,6 +989,7 @@ uop_abstract_interpret( _PyUOpInstruction *end = NULL; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; bool first_impure = true; + bool has_enough_space_to_duplicate_loop = true; int res = 0; loop_peeling: @@ -1030,18 +1031,27 @@ uop_abstract_interpret( // If we end in a loop, and we have a lot of space left, peel the loop for added type stability // https://en.wikipedia.org/wiki/Loop_splitting - if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && - ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer))) { + has_enough_space_to_duplicate_loop = ((ctx->emitter.curr_i * 2) < + (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer)); + if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && has_enough_space_to_duplicate_loop) { + OPT_STAT_INC(loop_body_duplication_attempts); did_loop_peel = true; _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; if (emit_i(&ctx->emitter, jump_header) < 0) { goto error; } - DPRINTF(2, "loop_peeling!\n"); + DPRINTF(1, "loop_peeling!\n"); goto loop_peeling; } else { +#if defined(Py_STATS) || defined(Py_DEBUG) + if(!did_loop_peel && curr->opcode == _JUMP_TO_TOP && !has_enough_space_to_duplicate_loop) { + OPT_STAT_INC(loop_body_duplication_no_mem); + DPRINTF(1, "no space for loop peeling\n"); + } +#endif if (did_loop_peel) { + OPT_STAT_INC(loop_body_duplication_successes); assert(curr->opcode == _JUMP_TO_TOP); _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; if (emit_i(&ctx->emitter, jump_abs) < 0) { From d226882c86a9d354fc9823830497cf53aebb55b6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 28 Jan 2024 07:57:29 -0800 Subject: [PATCH 070/111] Add `(void)found` to silence compiler warning on L802 --- Python/optimizer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index 6c60fee914c088..42b4acd1e73400 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -800,6 +800,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } } assert(found); + (void)found; } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG From 9a3585aea53ad2f1abc1ed658b05d3ee9ad7c654 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:25:59 +0800 Subject: [PATCH 071/111] convert frame and context to non-PyObjects --- Include/internal/pycore_optimizer.h | 2 + Python/optimizer.c | 2 - Python/optimizer_analysis.c | 75 ++++++++++++++--------------- 3 files changed, 37 insertions(+), 42 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 31f30c673f207a..7c8a6f97d0b3e6 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#define TRACE_STACK_SIZE 5 + int _Py_uop_analyze_and_optimize(PyCodeObject *code, _PyUOpInstruction *trace, int trace_len, int curr_stackentries); diff --git a/Python/optimizer.c b/Python/optimizer.c index 42b4acd1e73400..8f7e523fde87d0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -327,8 +327,6 @@ BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, }; -#define TRACE_STACK_SIZE 5 - #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a261316dfe4cda..747799954c6bcf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -36,6 +36,9 @@ #define PEEPHOLE_MAX_ATTEMPTS 5 +// +1 to account for implicit root frame. +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 1) + #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; static inline int get_lltrace(void) { @@ -102,7 +105,6 @@ typedef struct { typedef struct _Py_UOpsAbstractFrame { - PyObject_HEAD // Strong reference. struct _Py_UOpsAbstractFrame *prev; // Borrowed reference. @@ -122,20 +124,13 @@ typedef struct _Py_UOpsAbstractFrame { static void abstractframe_dealloc(_Py_UOpsAbstractFrame *self) { + if (self == NULL) { + return; + } PyMem_Free(self->sym_consts); - Py_XDECREF(self->prev); - Py_TYPE(self)->tp_free((PyObject *)self); + abstractframe_dealloc(self->prev); } -PyTypeObject _Py_UOpsAbstractFrame_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract frame", - .tp_basicsize = sizeof(_Py_UOpsAbstractFrame) , - .tp_itemsize = 0, - .tp_dealloc = (destructor)abstractframe_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; typedef struct ty_arena { int ty_curr_number; @@ -158,6 +153,9 @@ typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD // The current "executing" frame. _Py_UOpsAbstractFrame *frame; + // Need one more for the root frame. + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; // Arena for the symbolic types. ty_arena t_arena; @@ -168,14 +166,16 @@ typedef struct _Py_UOpsAbstractInterpContext { _Py_UOpsSymType **water_level; _Py_UOpsSymType **limit; - _Py_UOpsSymType *locals_and_stack[1]; + _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; } _Py_UOpsAbstractInterpContext; static void -abstractinterp_dealloc(PyObject *o) +abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) { - _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; - Py_XDECREF(self->frame); + if (self == NULL) { + return; + } + abstractframe_dealloc(self->frame); if (self->t_arena.arena != NULL) { int tys = self->t_arena.ty_curr_number; for (int i = 0; i < tys; i++) { @@ -183,18 +183,9 @@ abstractinterp_dealloc(PyObject *o) } } PyMem_Free(self->t_arena.arena); - Py_TYPE(self)->tp_free((PyObject *)self); + PyMem_Free(self); } -PyTypeObject _Py_UOpsAbstractInterpContext_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract interpreter's context", - .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext) - sizeof(_Py_UOpsSymType *), - .tp_itemsize = sizeof(_Py_UOpsSymType *), - .tp_dealloc = (destructor)abstractinterp_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; static inline _Py_UOpsAbstractFrame * frame_new(_Py_UOpsAbstractInterpContext *ctx, @@ -231,9 +222,7 @@ abstractinterp_context_new(PyCodeObject *co, goto error; } - self = PyObject_NewVar(_Py_UOpsAbstractInterpContext, - &_Py_UOpsAbstractInterpContext_Type, - MAX_ABSTRACT_INTERP_SIZE); + self = PyMem_New(_Py_UOpsAbstractInterpContext, 1); if (self == NULL) { goto error; } @@ -252,6 +241,7 @@ abstractinterp_context_new(PyCodeObject *co, // Frame setup + self->curr_frame_depth = 0; frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen); if (frame == NULL) { goto error; @@ -284,8 +274,8 @@ abstractinterp_context_new(PyCodeObject *co, self->t_arena.arena = NULL; self->frame = NULL; } - Py_XDECREF(self); - Py_XDECREF(frame); + abstractinterp_dealloc(self); + abstractframe_dealloc(frame); return NULL; } @@ -381,8 +371,9 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, if (sym_consts == NULL) { return NULL; } - _Py_UOpsAbstractFrame *frame = PyObject_New(_Py_UOpsAbstractFrame, - &_Py_UOpsAbstractFrame_Type); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + ctx->curr_frame_depth++; + assert(ctx->curr_frame_depth <= MAX_ABSTRACT_FRAME_DEPTH); if (frame == NULL) { PyMem_Free(sym_consts); return NULL; @@ -394,7 +385,6 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, frame->stack_len = stack_len; frame->locals_len = locals_len; frame->prev = NULL; - frame->next = NULL; return frame; } @@ -447,7 +437,6 @@ ctx_frame_push( } frame->prev = ctx->frame; - ctx->frame->next = frame; ctx->frame = frame; @@ -465,8 +454,8 @@ ctx_frame_pop( frame->prev = NULL; ctx->water_level = frame->locals; - Py_DECREF(frame); - ctx->frame->next = NULL; + PyMem_Free(frame->sym_consts); + ctx->curr_frame_depth--; return 0; } @@ -881,10 +870,15 @@ uop_abstract_interpret_single_inst( // Don't put in the new frame. Leave it be so that _PUSH_FRAME // can extract callable, self_or_null and args later. // This also means our stack pointer diverges from the real VM. + + // IMPORTANT: make sure there is no interference + // between this and _PUSH_FRAME. That is a required invariant. break; } case _PUSH_FRAME: { + // From _INIT_CALL_PY_EXACT_ARGS + int argcount = oparg; // _INIT_CALL_PY_EXACT_ARGS's real stack effect in the VM. stack_pointer += -1 - oparg; @@ -900,9 +894,10 @@ uop_abstract_interpret_single_inst( assert(self_or_null != NULL); _Py_UOpsSymType **args = &PEEK(-1); assert(args != NULL); - // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS if (!sym_is_type(self_or_null, NULL_TYPE) && !sym_is_type(self_or_null, SELF_OR_NULL)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in + // VM args--; argcount++; } @@ -1066,12 +1061,12 @@ uop_abstract_interpret( res = ctx->emitter.curr_i; - Py_DECREF(ctx); + abstractinterp_dealloc(ctx); return res; error: - Py_XDECREF(ctx); + abstractinterp_dealloc(ctx); return -1; } From 882b48c065f3fa5d3b3756aa6926f9ad9765f154 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:40:45 +0800 Subject: [PATCH 072/111] add test for freed functions --- Lib/test/test_capi/test_opt.py | 26 ++++++++++++++++++++++++++ Python/optimizer_analysis.c | 6 +++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index d974d1fab219d5..38a4f2a4c97bb2 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2,6 +2,7 @@ import opcode import textwrap import unittest +import gc import _testinternalcapi @@ -870,6 +871,31 @@ def testfunc(loops): iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] self.assertGreaterEqual(len(iter_next_count), 2) + def test_call_py_exact_args_disappearing(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + # Trigger specialization + testfunc(8) + with temporary_optimizer(opt): + del dummy + gc.collect() + + def dummy(x): + return x + 2 + testfunc(10) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_CHECK_PEP_523", uops) + def test_truncated_zipfile(self): import io import zipfile diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 747799954c6bcf..a9fe08b504f106 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -36,8 +36,8 @@ #define PEEPHOLE_MAX_ATTEMPTS 5 -// +1 to account for implicit root frame. -#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 1) +// + buffer to account for implicit root frame. +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) #ifdef Py_DEBUG static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; @@ -371,9 +371,9 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, if (sym_consts == NULL) { return NULL; } + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; ctx->curr_frame_depth++; - assert(ctx->curr_frame_depth <= MAX_ABSTRACT_FRAME_DEPTH); if (frame == NULL) { PyMem_Free(sym_consts); return NULL; From 551466f6d17f4e2f5460f60a599a7eff31c15634 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:15:52 +0800 Subject: [PATCH 073/111] cleanup --- Python/optimizer_analysis.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a9fe08b504f106..142458d1abfb45 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -153,7 +153,6 @@ typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD // The current "executing" frame. _Py_UOpsAbstractFrame *frame; - // Need one more for the root frame. _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; int curr_frame_depth; @@ -213,11 +212,10 @@ abstractinterp_context_new(PyCodeObject *co, int stack_len = co->co_stacksize; _Py_UOpsAbstractFrame *frame = NULL; _Py_UOpsAbstractInterpContext *self = NULL; - char *arena = NULL; _Py_UOpsSymType *t_arena = NULL; - Py_ssize_t ty_arena_size = (sizeof(_Py_UOpsSymType)) * ir_entries * OVERALLOCATE_FACTOR; + int ty_arena_size = ir_entries * OVERALLOCATE_FACTOR; - t_arena = (_Py_UOpsSymType *)PyMem_Malloc(ty_arena_size); + t_arena = (_Py_UOpsSymType *)PyMem_New(_Py_UOpsSymType, ty_arena_size); if (t_arena == NULL) { goto error; } @@ -237,7 +235,7 @@ abstractinterp_context_new(PyCodeObject *co, // Setup the arena for sym expressions. self->t_arena.ty_curr_number = 0; self->t_arena.arena = t_arena; - self->t_arena.ty_max_number = ir_entries * OVERALLOCATE_FACTOR; + self->t_arena.ty_max_number = ty_arena_size; // Frame setup @@ -267,7 +265,6 @@ abstractinterp_context_new(PyCodeObject *co, return self; error: - PyMem_Free(arena); PyMem_Free(t_arena); if (self != NULL) { // Important so we don't double free them. @@ -374,10 +371,6 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; ctx->curr_frame_depth++; - if (frame == NULL) { - PyMem_Free(sym_consts); - return NULL; - } frame->sym_consts = sym_consts; From 17a989ce0d8a140e57249516a89358bc07ef35f7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:31:22 +0800 Subject: [PATCH 074/111] general cleanups --- Include/cpython/pystats.h | 1 + Python/optimizer_analysis.c | 59 +++++++++++++++++++------------------ Python/specialize.c | 8 +++++ 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 91cb2136cf05a5..c440f033f9f576 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -124,6 +124,7 @@ typedef struct _optimization_stats { uint64_t optimizer_successes; uint64_t optimizer_failure_reason_null_function; uint64_t optimizer_failure_reason_no_memory; + uint64_t optimizer_failure_reason_no_writebuffer; uint64_t loop_body_duplication_attempts; uint64_t loop_body_duplication_successes; uint64_t loop_body_duplication_no_mem; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 142458d1abfb45..f3a2d4652851bc 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -32,9 +32,9 @@ #define MAX_ABSTRACT_INTERP_SIZE 2048 -#define OVERALLOCATE_FACTOR 3 +#define OVERALLOCATE_FACTOR 5 -#define PEEPHOLE_MAX_ATTEMPTS 5 +#define PEEPHOLE_MAX_ATTEMPTS 3 // + buffer to account for implicit root frame. #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) @@ -107,8 +107,6 @@ typedef struct { typedef struct _Py_UOpsAbstractFrame { // Strong reference. struct _Py_UOpsAbstractFrame *prev; - // Borrowed reference. - struct _Py_UOpsAbstractFrame *next; // Symbolic version of co_consts int sym_consts_len; _Py_UOpsSymType **sym_consts; @@ -288,7 +286,7 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { - _Py_UOpsSymType *res = sym_init_const(ctx, Py_NewRef(PyTuple_GET_ITEM(co_consts, i)), (int)i); + _Py_UOpsSymType *res = sym_init_const(ctx, PyTuple_GET_ITEM(co_consts, i), (int)i); if (res == NULL) { goto error; } @@ -470,11 +468,10 @@ _Py_UOpsSymType_New(_Py_UOpsAbstractInterpContext *ctx, self->const_val = NULL; self->types = 0; - self->const_val = NULL; - self->types = 0; - if (const_val != NULL) { + Py_INCREF(const_val); sym_set_type_from_const(self, const_val); + self->const_val = const_val; } return self; @@ -496,7 +493,7 @@ sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) { to_sym->types = (from_sym->types & IMMUTABLES); if (to_sym->types) { - to_sym->const_val = Py_XNewRef(from_sym->const_val); + Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); } } @@ -505,7 +502,6 @@ static void sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) { PyTypeObject *tp = Py_TYPE(obj); - sym->const_val = obj; if (tp == &PyLong_Type) { sym_set_type(sym, PYLONG_TYPE, 0); @@ -546,7 +542,7 @@ sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) return _Py_UOpsSymType_New(ctx,NULL); } -// Steals a reference to const_val +// Takes a borrowed reference to const_val. static inline _Py_UOpsSymType* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) { @@ -671,11 +667,13 @@ emit_i(uops_emitter *emitter, _PyUOpInstruction inst) { if (emitter->curr_i < 0) { - DPRINTF(2, "out of emission space\n"); + OPT_STAT_INC(optimizer_failure_reason_no_writebuffer); + DPRINTF(1, "out of emission space\n"); return -1; } if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { - DPRINTF(2, "out of emission space\n"); + OPT_STAT_INC(optimizer_failure_reason_no_writebuffer); + DPRINTF(1, "out of emission space\n"); return -1; } if (inst.opcode == _NOP) { @@ -1017,9 +1015,10 @@ uop_abstract_interpret( assert(op_is_end(curr->opcode)); - // If we end in a loop, and we have a lot of space left, peel the loop for added type stability + // If we end in a loop, and we have a lot of space left, peel the loop for + // poor man's loop invariant code motino for guards // https://en.wikipedia.org/wiki/Loop_splitting - has_enough_space_to_duplicate_loop = ((ctx->emitter.curr_i * 2) < + has_enough_space_to_duplicate_loop = ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer)); if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && has_enough_space_to_duplicate_loop) { OPT_STAT_INC(loop_body_duplication_attempts); @@ -1154,7 +1153,8 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) load_count += op_is_load(back->opcode); if (back->opcode == _LOAD_CONST_INLINE) { PyObject *const_val = (PyObject *)back->operand; - Py_CLEAR(const_val); + Py_DECREF(const_val); + back->operand = (uintptr_t)NULL; } back->opcode = NOP; back--; @@ -1176,6 +1176,18 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) return done; } +void +infallible_optimizations(_PyUOpInstruction *buffer, int buffer_size) +{ + bool done = false; + for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && + !done; + peephole_attempts++) { + done = peephole_optimizations(buffer, buffer_size); + } + remove_unneeded_uops(buffer, buffer_size); +} + int _Py_uop_analyze_and_optimize( @@ -1204,13 +1216,7 @@ _Py_uop_analyze_and_optimize( goto error; } - for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && - !peephole_optimizations(temp_writebuffer, new_trace_len); - peephole_attempts++) { - - } - - remove_unneeded_uops(temp_writebuffer, new_trace_len); + infallible_optimizations(temp_writebuffer, new_trace_len); // Fill in our new trace! memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); @@ -1229,16 +1235,11 @@ _Py_uop_analyze_and_optimize( OPT_STAT_INC(optimizer_successes); return 0; error: + infallible_optimizations(buffer, buffer_size); // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. err_occurred = PyErr_Occurred(); PyMem_Free(temp_writebuffer); - for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && - !done; - peephole_attempts++) { - done = peephole_optimizations(buffer, buffer_size); - } - remove_unneeded_uops(buffer, buffer_size); return err_occurred ? -1 : 0; } \ No newline at end of file diff --git a/Python/specialize.c b/Python/specialize.c index e2b5790793b096..c0bd7c0e3546cd 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -246,6 +246,14 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) stats->optimizer_failure_reason_null_function); fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", stats->optimizer_failure_reason_no_memory); + fprintf(out, "Optimization optimizer failure no writebuffer left: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_writebuffer); + fprintf(out, "Optimization optimizer loop duplication attempts: %" PRIu64 "\n", + stats->loop_body_duplication_attempts); + fprintf(out, "Optimization optimizer loop duplication successes: %" PRIu64 "\n", + stats->loop_body_duplication_successes); + fprintf(out, "Optimization optimizer loop duplication no memory: %" PRIu64 "\n", + stats->loop_body_duplication_no_mem); const char* const* names; for (int i = 0; i < 512; i++) { From fcdc84cf8f8ae2251fd05b2cd0b43f1772b4471d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:39:37 +0800 Subject: [PATCH 075/111] make static --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f3a2d4652851bc..5e86c3a22dbcd8 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1176,7 +1176,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) return done; } -void +static void infallible_optimizations(_PyUOpInstruction *buffer, int buffer_size) { bool done = false; From 065b8a43221ec0856164ad6f322afe4ae97e32f6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 08:59:58 +0800 Subject: [PATCH 076/111] add comment by Guido Co-Authored-By: Guido van Rossum --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5e86c3a22dbcd8..1cbebd38b7528f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -36,7 +36,7 @@ #define PEEPHOLE_MAX_ATTEMPTS 3 -// + buffer to account for implicit root frame. +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) #ifdef Py_DEBUG From 580dd14fd0f7a1b45dd27eca1f2b237c9b2ccc73 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 28 Jan 2024 21:36:42 +0800 Subject: [PATCH 077/111] fix _JUMP_ABSOLUTE --- Python/jit.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 22949c082da05a..909a687f48910e 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -298,6 +298,18 @@ emit(const StencilGroup *group, uint64_t patches[]) copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); } +static size_t +calculate_jump_abs_offset(_PyUOpInstruction *trace, _PyUOpInstruction *jump_absolute) +{ + assert(jump_absolute->opcode == _JUMP_ABSOLUTE); + size_t total = 0; + bool found = false; + for (int i = 0; i < jump_absolute->oparg; i++) { + total += stencil_groups[trace[i].opcode].code.body_size; + } + return total; +} + // Compiles executor in-place. Don't forget to call _PyJIT_Free later! int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length) @@ -329,7 +341,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len // Think of patches as a dictionary mapping HoleValue to uint64_t: uint64_t patches[] = GET_PATCHES(); patches[HoleValue_CODE] = (uint64_t)code; - patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + if (instruction->opcode == _JUMP_ABSOLUTE) { + assert(i + 1 == length); + patches[HoleValue_CONTINUE] = (uint64_t)memory + calculate_jump_abs_offset(trace, instruction); + } + else { + patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + }; patches[HoleValue_DATA] = (uint64_t)data; patches[HoleValue_EXECUTOR] = (uint64_t)executor; patches[HoleValue_OPARG] = instruction->oparg; From d603792e1b34d0dddf5ca6eb55d17a508844edba Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 09:22:20 +0800 Subject: [PATCH 078/111] remove unsued var --- Python/jit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 909a687f48910e..308bde86d063df 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -303,7 +303,6 @@ calculate_jump_abs_offset(_PyUOpInstruction *trace, _PyUOpInstruction *jump_abso { assert(jump_absolute->opcode == _JUMP_ABSOLUTE); size_t total = 0; - bool found = false; for (int i = 0; i < jump_absolute->oparg; i++) { total += stencil_groups[trace[i].opcode].code.body_size; } From f206bd066ce49d327f3b214ff7ef6e5c05e08a93 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 09:53:34 +0800 Subject: [PATCH 079/111] use iterative instead of recursive --- Python/optimizer_analysis.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 1cbebd38b7528f..44a615d860cf34 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -122,11 +122,11 @@ typedef struct _Py_UOpsAbstractFrame { static void abstractframe_dealloc(_Py_UOpsAbstractFrame *self) { - if (self == NULL) { - return; + _Py_UOpsAbstractFrame *curr = self; + while (curr != NULL) { + PyMem_Free(curr->sym_consts); + curr = curr->prev; } - PyMem_Free(self->sym_consts); - abstractframe_dealloc(self->prev); } From 17b4ae3cb509a9e209bea944fdba8a8f7eeb5eeb Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:25:05 +0800 Subject: [PATCH 080/111] fix bad test on aarch64 linux --- Lib/test/test_capi/test_opt.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 38a4f2a4c97bb2..1bd162d6ddb54b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -897,9 +897,12 @@ def dummy(x): self.assertNotIn("_CHECK_PEP_523", uops) def test_truncated_zipfile(self): - import io - import zipfile - from random import random + try: + import io + import zipfile + from random import random + except ImportError: + self.skipTest("Cannot import") opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): FIXEDTEST_SIZE = 1000 From 913d95bc06575df8b67d36ffb96d35e477ab69f4 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 13:25:18 +0800 Subject: [PATCH 081/111] remove non-compliant test --- Lib/test/test_capi/test_opt.py | 45 ---------------------------------- 1 file changed, 45 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 1bd162d6ddb54b..9ab2f64b061744 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -896,51 +896,6 @@ def dummy(x): self.assertIn("_PUSH_FRAME", uops) self.assertNotIn("_CHECK_PEP_523", uops) - def test_truncated_zipfile(self): - try: - import io - import zipfile - from random import random - except ImportError: - self.skipTest("Cannot import") - opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - FIXEDTEST_SIZE = 1000 - line_gen = [bytes("Zipfile test line %d. random float: %f\n" % - (i, random()), "ascii") - for i in range(FIXEDTEST_SIZE)] - - data = b''.join(line_gen) - compression = zipfile.ZIP_DEFLATED - fp = io.BytesIO() - with zipfile.ZipFile(fp, mode='w') as zipf: - zipf.writestr('strfile', data, compress_type=compression) - end_offset = fp.tell() - zipfiledata = fp.getvalue() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - zipopen.read() - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read(100): - pass - - fp = io.BytesIO(zipfiledata) - with zipfile.ZipFile(fp) as zipf: - with zipf.open('strfile') as zipopen: - fp.truncate(end_offset - 20) - with self.assertRaises(EOFError): - while zipopen.read1(100): - pass - if __name__ == "__main__": From 425b40d1eb594e716cddb11d373fc46879eb1cd6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 30 Jan 2024 22:11:17 +0800 Subject: [PATCH 082/111] fix compiler warnings --- Python/abstract_interp_cases.c.h | 7 +++-- Python/optimizer_analysis.c | 1 - .../tier2_abstract_generator.py | 29 +++++++++---------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 75b9e0350aaf80..b17e66a7d8164b 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -21,6 +21,7 @@ case _POP_TOP: { _Py_UOpsSymType *__value_; __value_ = stack_pointer[-1]; + (void)__value_; stack_pointer += -1; break; } @@ -30,8 +31,8 @@ _Py_UOpsSymType *__receiver_; __value_ = stack_pointer[-1]; __receiver_ = stack_pointer[-2]; - __value_ = _Py_UOpsSymType_New(ctx, NULL); - if (__value_ == NULL) { goto error; } + (void)__receiver_; + (void)__value_; stack_pointer[-2] = __value_; stack_pointer += -1; break; @@ -1948,6 +1949,8 @@ _Py_UOpsSymType *__bottom_; __top_ = stack_pointer[-1]; __bottom_ = stack_pointer[-2 - (oparg-2)]; + (void)__bottom_; + (void)__top_; stack_pointer[-2 - (oparg-2)] = __top_; stack_pointer[-1] = __bottom_; break; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 44a615d860cf34..370936bb7ba42a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1200,7 +1200,6 @@ _Py_uop_analyze_and_optimize( OPT_STAT_INC(optimizer_attempts); _PyUOpInstruction *temp_writebuffer = NULL; bool err_occurred = false; - bool done = false; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size); if (temp_writebuffer == NULL) { diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 33f0025d7ae277..8bd7bad883a726 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -45,6 +45,9 @@ "_CHECK_STACK_SPACE", "_INIT_CALL_PY_EXACT_ARGS", "_END_SEND", + "_POP_TOP", + "_NOP", + "_SWAP", } @@ -206,18 +209,15 @@ def _write_body_abstract_interp_pure_uop( mangled_uop.stack.inputs ) - if uop.name in {"_NOP", "_SWAP", "_POP_TOP"}: + # uop is mandatory - we cannot const evaluate it + if uop.name in NO_CONST_OR_TYPE_EVALUATE: + for in_ in mangled_uop.stack.inputs: + out.emit(f"(void){in_.name};\n") return assert ( len(uop.stack.outputs) == 1 ), f"Currently we only support 1 stack output for pure ops: {uop}" - # uop is mandatory - we cannot const evaluate it - sym = new_sym(None) - if uop.name in NO_CONST_OR_TYPE_EVALUATE: - out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") - out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") - return # Constant prop only handles one output, and no variadic inputs. # Perhaps in the future we can support these. @@ -246,15 +246,12 @@ def _write_body_abstract_interp_pure_uop( out.emit(f" if (emit_const(&ctx->emitter, {const_val}, shrink_stack) < 0) {{ goto error; }}\n") out.emit("new_inst.opcode = _NOP;\n") out.emit("}\n") - out.emit("else {\n") - sym = new_sym(None) - out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") - out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") - out.emit("}\n") - else: - sym = new_sym(None) - out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") - out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") + if not mangled_uop.stack.outputs[0].peek: + out.emit("else {\n") + sym = new_sym(None) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") + out.emit("}\n") out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") From 2c884e2119553fb252c285d0c2be3744c67df0d8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 31 Jan 2024 10:02:35 +0800 Subject: [PATCH 083/111] low hanging fruit in Guido's review Co-Authored-By: Guido van Rossum --- Python/abstract_interp_cases.c.h | 34 +++------ Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 71 +++++-------------- .../tier2_abstract_generator.py | 4 +- 4 files changed, 30 insertions(+), 81 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index b17e66a7d8164b..2e599d2b9f61b4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -59,8 +59,7 @@ res = Py_IsFalse(value) ? Py_True : Py_False; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 1; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 1) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -195,8 +194,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -230,8 +228,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -265,8 +262,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -333,8 +329,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -369,8 +364,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -405,8 +399,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -472,8 +465,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - shrink_stack.oparg = 2; - if (emit_const(&ctx->emitter, (PyObject *)res, shrink_stack) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -1540,16 +1532,6 @@ new_inst.opcode = _NOP; break; } - // Type guard elimination - if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0)) { - DPRINTF(2, "type propagation eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } - else { - // Type propagation - sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, (uint32_t)0); - } break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9036e32344f2b0..1327aca01bec09 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2880,7 +2880,7 @@ dummy_func( exc_info->exc_value = Py_NewRef(new_exc); } - op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) { + op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 370936bb7ba42a..d98560d01dcb22 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -34,8 +34,6 @@ #define OVERALLOCATE_FACTOR 5 -#define PEEPHOLE_MAX_ATTEMPTS 3 - // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) @@ -73,9 +71,8 @@ typedef enum { NULL_TYPE = 6, PYMETHOD_TYPE = 7, GUARD_DORV_VALUES_TYPE = 8, - GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE = 9, // Can't statically determine if self or null. - SELF_OR_NULL = 10, + SELF_OR_NULL = 9, // Represents something from LOAD_CONST which is truly constant. TRUE_CONST = 30, @@ -105,7 +102,6 @@ typedef struct { typedef struct _Py_UOpsAbstractFrame { - // Strong reference. struct _Py_UOpsAbstractFrame *prev; // Symbolic version of co_consts int sym_consts_len; @@ -497,7 +493,6 @@ sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) } } -// Steals a reference to obj static void sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) { @@ -513,26 +508,6 @@ sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) sym_set_type(sym, PYUNICODE_TYPE, 0); } - if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(obj); - - if (_PyDictOrValues_IsValues(*dorv) || - _PyObject_MakeInstanceAttributesFromDict(obj, dorv)) { - sym_set_type(sym, GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE, 0); - - PyTypeObject *owner_cls = tp; - PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; - sym_set_type( - sym, - GUARD_KEYS_VERSION_TYPE, - owner_heap_type->ht_cached_keys->dk_version - ); - } - - if (!_PyDictOrValues_IsValues(*dorv)) { - sym_set_type(sym, GUARD_DORV_VALUES_TYPE, 0); - } - } } @@ -606,7 +581,8 @@ sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ) static inline bool op_is_end(uint32_t opcode) { - return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP || + opcode == _JUMP_ABSOLUTE; } static inline bool @@ -666,11 +642,6 @@ static inline int emit_i(uops_emitter *emitter, _PyUOpInstruction inst) { - if (emitter->curr_i < 0) { - OPT_STAT_INC(optimizer_failure_reason_no_writebuffer); - DPRINTF(1, "out of emission space\n"); - return -1; - } if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { OPT_STAT_INC(optimizer_failure_reason_no_writebuffer); DPRINTF(1, "out of emission space\n"); @@ -681,7 +652,7 @@ emit_i(uops_emitter *emitter, } DPRINTF(2, "Emitting instruction at [%d] op: %s, oparg: %d, target: %d, operand: %" PRIu64 " \n", emitter->curr_i, - (inst.opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[inst.opcode], + _PyOpcode_uop_name[inst.opcode], inst.oparg, inst.target, inst.operand); @@ -693,8 +664,9 @@ emit_i(uops_emitter *emitter, static inline int emit_const(uops_emitter *emitter, PyObject *const_val, - _PyUOpInstruction shrink_stack) + int num_pops) { + _PyUOpInstruction shrink_stack = {_SHRINK_STACK, num_pops, 0, 0}; if (emit_i(emitter, shrink_stack) < 0) { return -1; } @@ -783,11 +755,9 @@ uop_abstract_interpret_single_inst( _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; _PyUOpInstruction new_inst = *inst; - _PyUOpInstruction shrink_stack = {_SHRINK_STACK, 0, 0, 0}; - DPRINTF(3, "Abstract interpreting %s:%d ", - (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + _PyOpcode_uop_name[opcode], oparg); switch (opcode) { #include "abstract_interp_cases.c.h" @@ -974,14 +944,14 @@ uop_abstract_interpret( _PyUOpInstruction *curr = NULL; _PyUOpInstruction *end = NULL; AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; - bool first_impure = true; + bool needs_clear_locals = true; bool has_enough_space_to_duplicate_loop = true; int res = 0; loop_peeling: curr = trace; end = trace + trace_len; - first_impure = true; + needs_clear_locals = true; ; while (curr < end && !op_is_end(curr->opcode)) { @@ -989,16 +959,16 @@ uop_abstract_interpret( !op_is_specially_handled(curr->opcode) && !op_is_bookkeeping(curr->opcode) && !op_is_guard(curr->opcode)) { - DPRINTF(3, "Impure %s\n", (curr->opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[curr->opcode]); - if (first_impure) { + DPRINTF(3, "Impure %s\n", _PyOpcode_uop_name[curr->opcode]); + if (needs_clear_locals) { if (clear_locals_type_info(ctx) < 0) { goto error; } } - first_impure = false; + needs_clear_locals = false; } else { - first_impure = true; + needs_clear_locals = true; } @@ -1111,6 +1081,7 @@ op_is_zappable(int opcode) case _LOAD_CONST: case _LOAD_FAST: case _LOAD_CONST_INLINE_BORROW: + case _NOP: return true; default: return false; @@ -1139,8 +1110,9 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) // then we can safely eliminate that without side effects. int load_count = 0; _PyUOpInstruction *back = curr-1; - while(op_is_zappable(back->opcode) && - load_count < oparg) { + while (back >= buffer && + load_count < oparg && + op_is_zappable(back->opcode)) { load_count += op_is_load(back->opcode); back--; } @@ -1179,12 +1151,7 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) static void infallible_optimizations(_PyUOpInstruction *buffer, int buffer_size) { - bool done = false; - for (int peephole_attempts = 0; peephole_attempts < PEEPHOLE_MAX_ATTEMPTS && - !done; - peephole_attempts++) { - done = peephole_optimizations(buffer, buffer_size); - } + peephole_optimizations(buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); } @@ -1241,4 +1208,4 @@ _Py_uop_analyze_and_optimize( err_occurred = PyErr_Occurred(); PyMem_Free(temp_writebuffer); return err_occurred ? -1 : 0; -} \ No newline at end of file +} diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 8bd7bad883a726..669d6aa882a168 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -242,8 +242,8 @@ def _write_body_abstract_interp_pure_uop( maybe_const_val = new_sym(const_val) out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") out.emit(f"if({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") - out.emit(f"shrink_stack.oparg = {len(uop.stack.inputs)};\n") - out.emit(f" if (emit_const(&ctx->emitter, {const_val}, shrink_stack) < 0) {{ goto error; }}\n") + out.emit(f" if (emit_const(&ctx->emitter, {const_val}, " + f"{len(uop.stack.inputs)}) < 0) {{ goto error; }}\n") out.emit("new_inst.opcode = _NOP;\n") out.emit("}\n") if not mangled_uop.stack.outputs[0].peek: From d7d8e8cc47ccf8659ed2014053854ed0b9b78223 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 31 Jan 2024 11:09:02 +0800 Subject: [PATCH 084/111] cleanup more --- Python/optimizer_analysis.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d98560d01dcb22..251dc92133ac2e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1052,7 +1052,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = NOP; } } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + else if (opcode == _JUMP_TO_TOP || + opcode == _EXIT_TRACE || + opcode == _JUMP_ABSOLUTE) { break; } else { @@ -1097,10 +1099,9 @@ op_is_load(int opcode) opcode == _LOAD_CONST_INLINE_BORROW); } -static int +static void peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) { - bool done = true; for (int i = 0; i < buffer_size; i++) { _PyUOpInstruction *curr = buffer + i; int oparg = curr->oparg; @@ -1117,7 +1118,6 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) back--; } if (load_count == oparg) { - done = false; curr->opcode = NOP; back = curr-1; load_count = 0; @@ -1145,7 +1145,6 @@ peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) break; } } - return done; } static void @@ -1189,15 +1188,6 @@ _Py_uop_analyze_and_optimize( PyMem_Free(temp_writebuffer); - // _NOP out the rest of the buffer. - - // Fill up the rest of the buffer with NOPs - _PyUOpInstruction *after = buffer + new_trace_len + 1; - while (after < (buffer + buffer_size)) { - after->opcode = _NOP; - after++; - } - OPT_STAT_INC(optimizer_successes); return 0; error: From 47ee732cc75f8fac8320a1cd642c0a29a8887a10 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 31 Jan 2024 12:18:45 +0800 Subject: [PATCH 085/111] Remove abstractframe_dealloc, and frame->prev --- Python/optimizer_analysis.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 251dc92133ac2e..ba2ad0921fecbd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -102,7 +102,6 @@ typedef struct { typedef struct _Py_UOpsAbstractFrame { - struct _Py_UOpsAbstractFrame *prev; // Symbolic version of co_consts int sym_consts_len; _Py_UOpsSymType **sym_consts; @@ -115,16 +114,6 @@ typedef struct _Py_UOpsAbstractFrame { _Py_UOpsSymType **locals; } _Py_UOpsAbstractFrame; -static void -abstractframe_dealloc(_Py_UOpsAbstractFrame *self) -{ - _Py_UOpsAbstractFrame *curr = self; - while (curr != NULL) { - PyMem_Free(curr->sym_consts); - curr = curr->prev; - } -} - typedef struct ty_arena { int ty_curr_number; @@ -168,7 +157,12 @@ abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) if (self == NULL) { return; } - abstractframe_dealloc(self->frame); + int curr = self->curr_frame_depth - 1; + while (curr >= 0) { + PyMem_Free(self->frames[curr].sym_consts); + curr--; + } + self->curr_frame_depth = 0; if (self->t_arena.arena != NULL) { int tys = self->t_arena.ty_curr_number; for (int i = 0; i < tys; i++) { @@ -266,7 +260,7 @@ abstractinterp_context_new(PyCodeObject *co, self->frame = NULL; } abstractinterp_dealloc(self); - abstractframe_dealloc(frame); + PyMem_Free(frame->sym_consts); return NULL; } @@ -371,7 +365,6 @@ frame_new(_Py_UOpsAbstractInterpContext *ctx, frame->sym_consts_len = (int)Py_SIZE(co_consts); frame->stack_len = stack_len; frame->locals_len = locals_len; - frame->prev = NULL; return frame; } @@ -423,7 +416,6 @@ ctx_frame_push( return -1; } - frame->prev = ctx->frame; ctx->frame = frame; @@ -436,13 +428,12 @@ ctx_frame_pop( ) { _Py_UOpsAbstractFrame *frame = ctx->frame; - ctx->frame = frame->prev; - assert(ctx->frame != NULL); - frame->prev = NULL; ctx->water_level = frame->locals; PyMem_Free(frame->sym_consts); ctx->curr_frame_depth--; + assert(ctx->curr_frame_depth >= 1); + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; return 0; } From 01fb224a1ea07609b99734c7bc870a78ecc1481b Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 31 Jan 2024 12:23:46 +0800 Subject: [PATCH 086/111] update documentation --- Tools/cases_generator/interpreter_definition.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e87aff43762b11..a68d42690791e2 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -156,12 +156,11 @@ and their refinements are below. They obey the following predicates: * `NULL_TYPE`: `val == NULL` * `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` * `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` -* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: - `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` * `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` * `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` * `PYFUNCTION_TYPE_VERSION_TYPE`: `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` +* `SELF_OR_NULL`: `val == NULL || val != NULL` An `inst` without `stack_effect` is a transitional form to allow the original C code From 63f8abd4879eed59c4f1c4db1b8181a3694702d0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 31 Jan 2024 23:54:11 +0800 Subject: [PATCH 087/111] remove peephole pass in happy case! --- Include/internal/pycore_uop_metadata.h | 2 +- Python/abstract_interp_cases.c.h | 4 - Python/optimizer_analysis.c | 120 +++++++++--------- .../cases_generator/tier2_abstract_common.py | 1 + 4 files changed, 64 insertions(+), 63 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 8dc4d10c819e2b..a847cca9e29ceb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -165,7 +165,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_METHOD_LAZY_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, - [_CHECK_PEP_523] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_PEP_523] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 2e599d2b9f61b4..a351dab60467bf 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1689,10 +1689,6 @@ break; } - case _CHECK_PEP_523: { - break; - } - case _CHECK_FUNCTION_EXACT_ARGS: { _Py_UOpsSymType *__self_or_null_; _Py_UOpsSymType *__callable_; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index ba2ad0921fecbd..a3c12481f16cac 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -652,14 +652,67 @@ emit_i(uops_emitter *emitter, return 0; } +static inline bool +op_is_zappable(int opcode) +{ + switch(opcode) { + case _SET_IP: + case _CHECK_VALIDITY: + case _LOAD_CONST_INLINE: + case _LOAD_CONST: + case _LOAD_FAST: + case _LOAD_CONST_INLINE_BORROW: + case _NOP: + return true; + default: + return false; + } +} + +static inline bool +op_is_load(int opcode) +{ + return (opcode == _LOAD_CONST_INLINE || + opcode == _LOAD_CONST || + opcode == LOAD_FAST || + opcode == _LOAD_CONST_INLINE_BORROW); +} + static inline int emit_const(uops_emitter *emitter, PyObject *const_val, int num_pops) { _PyUOpInstruction shrink_stack = {_SHRINK_STACK, num_pops, 0, 0}; - if (emit_i(emitter, shrink_stack) < 0) { - return -1; + // If all that precedes a _SHRINK_STACK is a bunch of loads, + // then we can safely eliminate that without side effects. + int load_count = 0; + _PyUOpInstruction *back = emitter->writebuffer + emitter->curr_i - 1; + while (back >= emitter->writebuffer && + load_count < num_pops && + op_is_zappable(back->opcode)) { + load_count += op_is_load(back->opcode); + back--; + } + if (load_count == num_pops) { + back = emitter->writebuffer + emitter->curr_i - 1; + load_count = 0; + // Back up over the previous loads and zap them. + while(load_count < num_pops) { + load_count += op_is_load(back->opcode); + if (back->opcode == _LOAD_CONST_INLINE) { + PyObject *old_const_val = (PyObject *)back->operand; + Py_DECREF(old_const_val); + back->operand = (uintptr_t)NULL; + } + back->opcode = NOP; + back--; + } + } + else { + if (emit_i(emitter, shrink_stack) < 0) { + return -1; + } } int load_const_opcode = _Py_IsImmortal(const_val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; @@ -885,6 +938,13 @@ uop_abstract_interpret_single_inst( break; } + case _CHECK_PEP_523: + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + new_inst.opcode = _NOP; + } + break; case _SET_IP: case _CHECK_VALIDITY: case _SAVE_RETURN_OFFSET: @@ -1064,67 +1124,13 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } -static inline bool -op_is_zappable(int opcode) -{ - switch(opcode) { - case _SET_IP: - case _CHECK_VALIDITY: - case _LOAD_CONST_INLINE: - case _LOAD_CONST: - case _LOAD_FAST: - case _LOAD_CONST_INLINE_BORROW: - case _NOP: - return true; - default: - return false; - } -} - -static inline bool -op_is_load(int opcode) -{ - return (opcode == _LOAD_CONST_INLINE || - opcode == _LOAD_CONST || - opcode == LOAD_FAST || - opcode == _LOAD_CONST_INLINE_BORROW); -} static void peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) { for (int i = 0; i < buffer_size; i++) { _PyUOpInstruction *curr = buffer + i; - int oparg = curr->oparg; switch(curr->opcode) { - case _SHRINK_STACK: { - // If all that precedes a _SHRINK_STACK is a bunch of loads, - // then we can safely eliminate that without side effects. - int load_count = 0; - _PyUOpInstruction *back = curr-1; - while (back >= buffer && - load_count < oparg && - op_is_zappable(back->opcode)) { - load_count += op_is_load(back->opcode); - back--; - } - if (load_count == oparg) { - curr->opcode = NOP; - back = curr-1; - load_count = 0; - while(load_count < oparg) { - load_count += op_is_load(back->opcode); - if (back->opcode == _LOAD_CONST_INLINE) { - PyObject *const_val = (PyObject *)back->operand; - Py_DECREF(const_val); - back->operand = (uintptr_t)NULL; - } - back->opcode = NOP; - back--; - } - } - break; - } case _CHECK_PEP_523: /* Setting the eval frame function invalidates * all executors, so no need to check dynamically */ @@ -1172,8 +1178,6 @@ _Py_uop_analyze_and_optimize( goto error; } - infallible_optimizations(temp_writebuffer, new_trace_len); - // Fill in our new trace! memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index 83baba1d198f84..b31089b0893de4 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -17,4 +17,5 @@ "_SET_IP", "_CHECK_VALIDITY", "_SAVE_RETURN_OFFSET", + "_CHECK_PEP_523", } From 784d1712424ba5f4282532265ce7b3b6e6ebe871 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 1 Feb 2024 00:10:22 +0800 Subject: [PATCH 088/111] bail to tier 1 on failure, remove peepholer altogether --- Lib/test/test_capi/test_opt.py | 6 ++---- Python/optimizer.c | 4 ++++ Python/optimizer_analysis.c | 38 ++++++---------------------------- 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 9ab2f64b061744..9aeb20ebc963d6 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -891,10 +891,8 @@ def dummy(x): testfunc(10) ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - self.assertIn("_PUSH_FRAME", uops) - self.assertNotIn("_CHECK_PEP_523", uops) + # Optimizer should have just bailed to tier 1. + self.assertIsNone(ex) diff --git a/Python/optimizer.c b/Python/optimizer.c index 9f565fe16b03fc..bdbce90ffca9b6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -857,6 +857,10 @@ uop_optimize( if (err < 0) { return -1; } + if (err == 1) { + return 0; + } + assert(err == 0); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a3c12481f16cac..0259d24c8a3939 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1124,34 +1124,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } - -static void -peephole_optimizations(_PyUOpInstruction *buffer, int buffer_size) -{ - for (int i = 0; i < buffer_size; i++) { - _PyUOpInstruction *curr = buffer + i; - switch(curr->opcode) { - case _CHECK_PEP_523: - /* Setting the eval frame function invalidates - * all executors, so no need to check dynamically */ - if (_PyInterpreterState_GET()->eval_frame == NULL) { - curr->opcode = _NOP; - } - break; - default: - break; - } - } -} - -static void -infallible_optimizations(_PyUOpInstruction *buffer, int buffer_size) -{ - peephole_optimizations(buffer, buffer_size); - remove_unneeded_uops(buffer, buffer_size); -} - - +// 0 - optimizer success +// -1 - failure, and raise error +// 1 - failure, no error raised, just fall back to Tier 1 int _Py_uop_analyze_and_optimize( PyCodeObject *co, @@ -1162,7 +1137,6 @@ _Py_uop_analyze_and_optimize( { OPT_STAT_INC(optimizer_attempts); _PyUOpInstruction *temp_writebuffer = NULL; - bool err_occurred = false; temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size); if (temp_writebuffer == NULL) { @@ -1178,6 +1152,8 @@ _Py_uop_analyze_and_optimize( goto error; } + remove_unneeded_uops(temp_writebuffer, new_trace_len); + // Fill in our new trace! memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); @@ -1186,11 +1162,9 @@ _Py_uop_analyze_and_optimize( OPT_STAT_INC(optimizer_successes); return 0; error: - infallible_optimizations(buffer, buffer_size); // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. - err_occurred = PyErr_Occurred(); PyMem_Free(temp_writebuffer); - return err_occurred ? -1 : 0; + return PyErr_Occurred() ? -1 : 1; } From 79ba2bef55956ba2b52537098dcfd941235484e1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 1 Feb 2024 02:19:39 +0800 Subject: [PATCH 089/111] change error codes --- Python/optimizer.c | 9 +++------ Python/optimizer_analysis.c | 8 ++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index bdbce90ffca9b6..3f7fcb0f851974 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -854,13 +854,10 @@ uop_optimize( OPT_STAT_INC(traces_created); // This clears its errors, so if it fails it just doesn't optimize. err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_WORKING_LENGTH, curr_stackentries); - if (err < 0) { - return -1; - } - if (err == 1) { - return 0; + if (err <= 0) { + return err; } - assert(err == 0); + assert(err == 1); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0259d24c8a3939..3738d843916c91 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1124,9 +1124,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } -// 0 - optimizer success +// 0 - failure, no error raised, just fall back to Tier 1 // -1 - failure, and raise error -// 1 - failure, no error raised, just fall back to Tier 1 +// 1 - optimizer success int _Py_uop_analyze_and_optimize( PyCodeObject *co, @@ -1160,11 +1160,11 @@ _Py_uop_analyze_and_optimize( PyMem_Free(temp_writebuffer); OPT_STAT_INC(optimizer_successes); - return 0; + return 1; error: // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. PyMem_Free(temp_writebuffer); - return PyErr_Occurred() ? -1 : 1; + return PyErr_Occurred() ? -1 : 0; } From a0b58b12bae2efd68dab20a485be34d5aa347352 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 2 Feb 2024 23:19:39 +0800 Subject: [PATCH 090/111] Integrate the constant promoter --- Include/internal/pycore_optimizer.h | 21 ++++ Include/internal/pycore_uop_metadata.h | 12 +-- Lib/test/test_capi/test_opt.py | 45 ++++++++- Python/abstract_interp_cases.c.h | 18 ---- Python/optimizer.c | 14 +-- Python/optimizer_analysis.c | 98 +++++++++++++++---- .../cases_generator/tier2_abstract_common.py | 7 ++ 7 files changed, 157 insertions(+), 58 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ffbaea80eadfb7..d6ecd3598f9f03 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,12 +8,33 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_uop_ids.h" + #define TRACE_STACK_SIZE 5 int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); + +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + if (trace[i].opcode == _JUMP_ABSOLUTE || + trace[i].opcode == _JUMP_TO_TOP || + trace[i].opcode == _EXIT_TRACE) { + return; + } + } +} + + extern PyTypeObject _PyCounterExecutor_Type; extern PyTypeObject _PyCounterOptimizer_Type; extern PyTypeObject _PyDefaultOptimizer_Type; diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5916db2e01c98c..9e2b46054689b5 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -204,12 +204,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, [_JUMP_ABSOLUTE_HEADER] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, - [_LOAD_CONST_INLINE] = 0, - [_LOAD_CONST_INLINE_BORROW] = 0, - [_LOAD_CONST_INLINE_WITH_NULL] = 0, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, - [_CHECK_GLOBALS] = HAS_DEOPT_FLAG, - [_CHECK_BUILTINS] = HAS_DEOPT_FLAG, + [_LOAD_CONST_INLINE] = HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, + [_CHECK_GLOBALS] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_BUILTINS] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_SHRINK_STACK] = HAS_ARG_FLAG, }; diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 9aeb20ebc963d6..f448802aabcad2 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -891,9 +891,50 @@ def dummy(x): testfunc(10) ex = get_first_executor(testfunc) - # Optimizer should have just bailed to tier 1. - self.assertIsNone(ex) + self.assertIsNotNone(ex) + + def test_call_py_exact_args_disappearing(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + dummy(i) + opt = _testinternalcapi.get_uop_optimizer() + # Trigger specialization + testfunc(8) + with temporary_optimizer(opt): + del dummy + gc.collect() + + def dummy(x): + return x + 2 + testfunc(10) + + # As long as it doesn't crash it's fine. + # Whether we get an executor or not is non-deterministic, + # because it's decided by when the function is freed. + + def test_promote_globals_to_constants(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + x = range(i) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + # Bail to tier 1. + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) if __name__ == "__main__": diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a351dab60467bf..e737c0c91ff97a 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1984,24 +1984,6 @@ break; } - case _LOAD_CONST_INLINE: { - _Py_UOpsSymType *__value_; - __value_ = sym_init_unknown(ctx); - if(__value_ == NULL) goto error; - stack_pointer[0] = __value_; - stack_pointer += 1; - break; - } - - case _LOAD_CONST_INLINE_BORROW: { - _Py_UOpsSymType *__value_; - __value_ = sym_init_unknown(ctx); - if(__value_ == NULL) goto error; - stack_pointer[0] = __value_; - stack_pointer += 1; - break; - } - case _INTERNAL_INCREMENT_OPT_COUNTER: { stack_pointer += -1; break; diff --git a/Python/optimizer.c b/Python/optimizer.c index 835f47bfe18046..6029195d212cec 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -226,22 +226,10 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// -static void -clear_strong_refs_in_uops(_PyExecutorObject *self) -{ - Py_ssize_t uop_len = Py_SIZE(self); - _PyUOpInstruction *trace = &self->trace[0]; - for (Py_ssize_t i = 0; i < uop_len; i++) { - if (trace[i].opcode == _LOAD_CONST_INLINE) { - PyObject *c = (PyObject*)trace[i].operand; - Py_CLEAR(c); - } - } -} static void uop_dealloc(_PyExecutorObject *self) { - clear_strong_refs_in_uops(self); + clear_strong_refs_in_uops(&self->trace[0], Py_SIZE(self)); _Py_ExecutorClear(self); #ifdef _Py_JIT _PyJIT_Free(self); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index df62df7fdd1c77..2d835e3fac3534 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -268,7 +268,7 @@ abstractinterp_context_new(PyCodeObject *co, } static inline _Py_UOpsSymType* -sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx); +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val); static inline _Py_UOpsSymType ** create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) @@ -279,7 +279,7 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { - _Py_UOpsSymType *res = sym_init_const(ctx, PyTuple_GET_ITEM(co_consts, i), (int)i); + _Py_UOpsSymType *res = sym_init_const(ctx, PyTuple_GET_ITEM(co_consts, i)); if (res == NULL) { goto error; } @@ -513,7 +513,7 @@ sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) // Takes a borrowed reference to const_val. static inline _Py_UOpsSymType* -sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val, int const_idx) +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) { assert(const_val != NULL); _Py_UOpsSymType *temp = _Py_UOpsSymType_New( @@ -662,9 +662,11 @@ op_is_zappable(int opcode) case _SET_IP: case _CHECK_VALIDITY: case _LOAD_CONST_INLINE: + case _LOAD_CONST_INLINE_BORROW: + case _LOAD_CONST_INLINE_WITH_NULL: + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: case _LOAD_CONST: case _LOAD_FAST: - case _LOAD_CONST_INLINE_BORROW: case _NOP: return true; default: @@ -673,12 +675,20 @@ op_is_zappable(int opcode) } static inline bool -op_is_load(int opcode) +op_count_loads(int opcode) { - return (opcode == _LOAD_CONST_INLINE || - opcode == _LOAD_CONST || - opcode == LOAD_FAST || - opcode == _LOAD_CONST_INLINE_BORROW); + switch(opcode) { + case _LOAD_CONST_INLINE: + case _LOAD_CONST: + case _LOAD_FAST: + case _LOAD_CONST_INLINE_BORROW: + return 1; + case _LOAD_CONST_INLINE_WITH_NULL: + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + return 2; + default: + return 0; + } } static inline int @@ -694,7 +704,7 @@ emit_const(uops_emitter *emitter, while (back >= emitter->writebuffer && load_count < num_pops && op_is_zappable(back->opcode)) { - load_count += op_is_load(back->opcode); + load_count += op_count_loads(back->opcode); back--; } if (load_count == num_pops) { @@ -702,8 +712,9 @@ emit_const(uops_emitter *emitter, load_count = 0; // Back up over the previous loads and zap them. while(load_count < num_pops) { - load_count += op_is_load(back->opcode); - if (back->opcode == _LOAD_CONST_INLINE) { + load_count += op_count_loads(back->opcode); + if (back->opcode == _LOAD_CONST_INLINE || + back->opcode == _LOAD_CONST_INLINE_WITH_NULL) { PyObject *old_const_val = (PyObject *)back->operand; Py_DECREF(old_const_val); back->operand = (uintptr_t)NULL; @@ -850,6 +861,46 @@ uop_abstract_interpret_single_inst( new_inst.operand = (uintptr_t)val; break; } + case _LOAD_CONST_INLINE: + case _LOAD_CONST_INLINE_BORROW: + { + _Py_UOpsSymType *sym_const = sym_init_const(ctx, (PyObject *)inst->operand); + if (sym_const == NULL) { + goto error; + } + // We need to incref it for it to safely decref in the + // executor finalizer. + if (opcode == _LOAD_CONST_INLINE) { + Py_INCREF(inst->operand); + } + STACK_GROW(1); + PEEK(1) = sym_const; + assert(is_const(PEEK(1))); + break; + } + case _LOAD_CONST_INLINE_WITH_NULL: + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + { + _Py_UOpsSymType *sym_const = sym_init_const(ctx, (PyObject *)inst->operand); + if (sym_const == NULL) { + goto error; + } + // We need to incref it for it to safely decref in the + // executor finalizer. + if (opcode == _LOAD_CONST_INLINE_WITH_NULL) { + Py_INCREF(inst->operand); + } + STACK_GROW(1); + PEEK(1) = sym_const; + assert(is_const(PEEK(1))); + _Py_UOpsSymType *null_sym = sym_init_push_null(ctx); + if (null_sym == NULL) { + goto error; + } + STACK_GROW(1); + PEEK(1) = null_sym; + break; + } case STORE_FAST_MAYBE_NULL: case STORE_FAST: { _Py_UOpsSymType *value = PEEK(1); @@ -948,6 +999,8 @@ uop_abstract_interpret_single_inst( new_inst.opcode = _NOP; } break; + case _CHECK_GLOBALS: + case _CHECK_BUILTINS: case _SET_IP: case _CHECK_VALIDITY: case _SAVE_RETURN_OFFSET: @@ -1029,6 +1082,7 @@ global_to_const(_PyUOpInstruction *inst, PyObject *obj) } else { inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE; + Py_INCREF(res); } inst->operand = (uint64_t)res; } @@ -1170,9 +1224,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins = func->func_builtins; break; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return 1; + default: + if (op_is_end(opcode)) { + return 1; + } + break; } } return 0; @@ -1242,7 +1298,7 @@ uop_abstract_interpret( assert(op_is_end(curr->opcode)); // If we end in a loop, and we have a lot of space left, peel the loop for - // poor man's loop invariant code motino for guards + // poor man's loop invariant code motion for guards // https://en.wikipedia.org/wiki/Loop_splitting has_enough_space_to_duplicate_loop = ((ctx->emitter.curr_i * 3) < (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer)); @@ -1308,9 +1364,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = NOP; } } - else if (opcode == _JUMP_TO_TOP || - opcode == _EXIT_TRACE || - opcode == _JUMP_ABSOLUTE) { + else if (op_is_end(opcode)) { break; } else { @@ -1329,6 +1383,10 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } + +// 0 - failure, no error raised, just fall back to Tier 1 +// -1 - failure, and raise error +// 1 - optimizer success int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, @@ -1360,6 +1418,8 @@ _Py_uop_analyze_and_optimize( goto error; } + clear_strong_refs_in_uops(buffer, buffer_size); + remove_unneeded_uops(temp_writebuffer, new_trace_len); // Fill in our new trace! diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index b31089b0893de4..2a54c8170824bb 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -18,4 +18,11 @@ "_CHECK_VALIDITY", "_SAVE_RETURN_OFFSET", "_CHECK_PEP_523", + "_CHECK_GLOBALS", + "_CHECK_BUILTINS", + # Custom tier 2 things + "_LOAD_CONST_INLINE", + "_LOAD_CONST_INLINE_WITH_NULL", + "_LOAD_CONST_INLINE_BORROW", + "_LOAD_CONST_INLINE_BORROW_WITH_NULL", } From 4573fca010b9865c52405e5d253dd9e1940ae996 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 2 Feb 2024 23:22:23 +0800 Subject: [PATCH 091/111] fix test --- Lib/test/test_capi/test_opt.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f448802aabcad2..c9a5c1c5aa7378 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -891,30 +891,11 @@ def dummy(x): testfunc(10) ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - - def test_call_py_exact_args_disappearing(self): - def dummy(x): - return x+1 - - def testfunc(n): - for i in range(n): - dummy(i) - - opt = _testinternalcapi.get_uop_optimizer() - # Trigger specialization - testfunc(8) - with temporary_optimizer(opt): - del dummy - gc.collect() - - def dummy(x): - return x + 2 - testfunc(10) - - # As long as it doesn't crash it's fine. + # Honestly as long as it doesn't crash it's fine. # Whether we get an executor or not is non-deterministic, # because it's decided by when the function is freed. + # This test is a little implementation specific. + self.assertIsNone(ex) def test_promote_globals_to_constants(self): def dummy(x): From 2096a8acfb074d6162c79697862ccd8a1cf46bd3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Feb 2024 00:32:37 +0800 Subject: [PATCH 092/111] try fix memleak? --- Include/internal/pycore_optimizer.h | 16 ---------------- Python/optimizer.c | 12 ++++++++++++ Python/optimizer_analysis.c | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index d6ecd3598f9f03..ceaf419081f22d 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -17,22 +17,6 @@ int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyBloomFilter *dependencies); -static void -clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) -{ - for (Py_ssize_t i = 0; i < uop_len; i++) { - if (trace[i].opcode == _LOAD_CONST_INLINE || - trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { - PyObject *c = (PyObject*)trace[i].operand; - Py_CLEAR(c); - } - if (trace[i].opcode == _JUMP_ABSOLUTE || - trace[i].opcode == _JUMP_TO_TOP || - trace[i].opcode == _EXIT_TRACE) { - return; - } - } -} extern PyTypeObject _PyCounterExecutor_Type; diff --git a/Python/optimizer.c b/Python/optimizer.c index 6029195d212cec..fb9da8720d5823 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -226,6 +226,18 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + } +} + static void uop_dealloc(_PyExecutorObject *self) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2d835e3fac3534..a21259d3918f12 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1383,6 +1383,22 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + if (trace[i].opcode == _JUMP_ABSOLUTE || + trace[i].opcode == _JUMP_TO_TOP || + trace[i].opcode == _EXIT_TRACE) { + return; + } + } +} // 0 - failure, no error raised, just fall back to Tier 1 // -1 - failure, and raise error From 4ad28042e8a979b857b1a0fcc672e999c222a908 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Feb 2024 00:43:14 +0800 Subject: [PATCH 093/111] Revert "try fix memleak?" This reverts commit 2096a8acfb074d6162c79697862ccd8a1cf46bd3. --- Include/internal/pycore_optimizer.h | 16 ++++++++++++++++ Python/optimizer.c | 12 ------------ Python/optimizer_analysis.c | 16 ---------------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ceaf419081f22d..d6ecd3598f9f03 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -17,6 +17,22 @@ int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyBloomFilter *dependencies); +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + if (trace[i].opcode == _JUMP_ABSOLUTE || + trace[i].opcode == _JUMP_TO_TOP || + trace[i].opcode == _EXIT_TRACE) { + return; + } + } +} extern PyTypeObject _PyCounterExecutor_Type; diff --git a/Python/optimizer.c b/Python/optimizer.c index fb9da8720d5823..6029195d212cec 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -226,18 +226,6 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// -static void -clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) -{ - for (Py_ssize_t i = 0; i < uop_len; i++) { - if (trace[i].opcode == _LOAD_CONST_INLINE || - trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { - PyObject *c = (PyObject*)trace[i].operand; - Py_CLEAR(c); - } - } -} - static void uop_dealloc(_PyExecutorObject *self) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a21259d3918f12..2d835e3fac3534 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1383,22 +1383,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } -static void -clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) -{ - for (Py_ssize_t i = 0; i < uop_len; i++) { - if (trace[i].opcode == _LOAD_CONST_INLINE || - trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { - PyObject *c = (PyObject*)trace[i].operand; - Py_CLEAR(c); - } - if (trace[i].opcode == _JUMP_ABSOLUTE || - trace[i].opcode == _JUMP_TO_TOP || - trace[i].opcode == _EXIT_TRACE) { - return; - } - } -} // 0 - failure, no error raised, just fall back to Tier 1 // -1 - failure, and raise error From 224f6b44459eec5ec67e0f7650849df0afccf12e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Feb 2024 00:51:47 +0800 Subject: [PATCH 094/111] fix memleak for real --- Include/internal/pycore_optimizer.h | 22 +++++----------------- Lib/test/test_capi/test_opt.py | 1 - Python/optimizer.c | 22 +++++++++++++++++----- Python/optimizer_analysis.c | 19 +++++++------------ 4 files changed, 29 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index d6ecd3598f9f03..a8ee8f29a73b22 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,6 +10,11 @@ extern "C" { #include "pycore_uop_ids.h" +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 512 +// This the above + additional working space we need. +#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 2) + #define TRACE_STACK_SIZE 5 int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, @@ -17,23 +22,6 @@ int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyBloomFilter *dependencies); -static void -clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) -{ - for (Py_ssize_t i = 0; i < uop_len; i++) { - if (trace[i].opcode == _LOAD_CONST_INLINE || - trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { - PyObject *c = (PyObject*)trace[i].operand; - Py_CLEAR(c); - } - if (trace[i].opcode == _JUMP_ABSOLUTE || - trace[i].opcode == _JUMP_TO_TOP || - trace[i].opcode == _EXIT_TRACE) { - return; - } - } -} - extern PyTypeObject _PyCounterExecutor_Type; extern PyTypeObject _PyCounterOptimizer_Type; diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c9a5c1c5aa7378..8aca8deadb9d54 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -895,7 +895,6 @@ def dummy(x): # Whether we get an executor or not is non-deterministic, # because it's decided by when the function is freed. # This test is a little implementation specific. - self.assertIsNone(ex) def test_promote_globals_to_constants(self): def dummy(x): diff --git a/Python/optimizer.c b/Python/optimizer.c index 6029195d212cec..d94f29c11da829 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -17,11 +17,6 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 512 -// This the above + additional working space we need. -#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 2) - #define MAX_EXECUTORS_SIZE 256 @@ -227,6 +222,23 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + if (trace[i].opcode == _JUMP_ABSOLUTE || + trace[i].opcode == _JUMP_TO_TOP || + trace[i].opcode == _EXIT_TRACE) { + return; + } + } +} + static void uop_dealloc(_PyExecutorObject *self) { clear_strong_refs_in_uops(&self->trace[0], Py_SIZE(self)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2d835e3fac3534..0675a89717d1af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1082,7 +1082,6 @@ global_to_const(_PyUOpInstruction *inst, PyObject *obj) } else { inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE; - Py_INCREF(res); } inst->operand = (uint64_t)res; } @@ -1397,42 +1396,38 @@ _Py_uop_analyze_and_optimize( ) { OPT_STAT_INC(optimizer_attempts); - _PyUOpInstruction *temp_writebuffer = NULL; + _PyUOpInstruction temp_writebuffer[UOP_MAX_TRACE_WORKING_LENGTH]; + _PyUOpInstruction temp_readbuffer[UOP_MAX_TRACE_WORKING_LENGTH]; - temp_writebuffer = PyMem_New(_PyUOpInstruction, buffer_size); - if (temp_writebuffer == NULL) { - goto error; - } + memcpy(temp_readbuffer, buffer, buffer_size * sizeof(_PyUOpInstruction)); - int err = remove_globals(frame, buffer, buffer_size, dependencies); + int err = remove_globals(frame, temp_readbuffer, buffer_size, dependencies); if (err <= 0) { - return err; + goto error; } // Pass: Abstract interpretation and symbolic analysis int new_trace_len = uop_abstract_interpret( - (PyCodeObject *)frame->f_executable, buffer, temp_writebuffer, + (PyCodeObject *)frame->f_executable, temp_readbuffer, temp_writebuffer, buffer_size, curr_stacklen); if (new_trace_len < 0) { goto error; } - clear_strong_refs_in_uops(buffer, buffer_size); remove_unneeded_uops(temp_writebuffer, new_trace_len); // Fill in our new trace! memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); - PyMem_Free(temp_writebuffer); OPT_STAT_INC(optimizer_successes); return 1; error: + // The only valid error we can raise is MemoryError. // Other times it's not really errors but things like not being able // to fetch a function version because the function got deleted. - PyMem_Free(temp_writebuffer); return PyErr_Occurred() ? -1 : 0; } From b784617cd170cee748af63c1294b9431eab91452 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Feb 2024 02:20:59 +0800 Subject: [PATCH 095/111] add more constant prop tests for promoted globals --- Lib/test/test_capi/test_opt.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8aca8deadb9d54..490ad236d581b8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -897,9 +897,6 @@ def dummy(x): # This test is a little implementation specific. def test_promote_globals_to_constants(self): - def dummy(x): - return x+1 - def testfunc(n): for i in range(n): x = range(i) @@ -910,12 +907,32 @@ def testfunc(n): testfunc(20) ex = get_first_executor(testfunc) - # Bail to tier 1. self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) + def test_promote_globals_to_constants_propagate(self): + def testfunc(n): + for i in range(n): + x = Foo.attr + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + self.assertEqual(res, Foo.attr) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_CHECK_ATTR_CLASS", uops) + self.assertIn("_LOAD_ATTR_CLASS", uops) + + +class Foo: + attr = 1 + if __name__ == "__main__": unittest.main() From 2983553fea8ca56027b01d15cc5ecd8d2ebfc3f3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 3 Feb 2024 04:11:55 +0800 Subject: [PATCH 096/111] refactor --- Include/internal/pycore_uop_metadata.h | 4 ++-- Tools/cases_generator/analyzer.py | 6 +----- Tools/cases_generator/tier2_abstract_common.py | 6 ++++++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9e2b46054689b5..40a1e12b515e33 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -208,8 +208,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_BORROW] = HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, - [_CHECK_GLOBALS] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, - [_CHECK_BUILTINS] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_GLOBALS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_BUILTINS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_SHRINK_STACK] = HAS_ARG_FLAG, }; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 8f0d8209786ab5..7faa415232cdef 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -3,11 +3,7 @@ import parser from typing import Optional -from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR - -SPECIAL_GUARDS = { - "_CHECK_PEP_523", -} +from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR, SPECIAL_GUARDS @dataclass class Properties: diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py index 2a54c8170824bb..92e896565c43d2 100644 --- a/Tools/cases_generator/tier2_abstract_common.py +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -26,3 +26,9 @@ "_LOAD_CONST_INLINE_BORROW", "_LOAD_CONST_INLINE_BORROW_WITH_NULL", } + +SPECIAL_GUARDS = { + "_CHECK_PEP_523", + "_CHECK_GLOBALS", + "_CHECK_BUILTINS", +} From 6a6b11f66ec20fc499b6d6913de9a3c376ad40e0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 16:03:06 +0800 Subject: [PATCH 097/111] Simplify codegen -- either types or consts, not both --- Python/abstract_interp_cases.c.h | 120 ++--------------- Python/optimizer_analysis.c | 14 ++ .../tier2_abstract_generator.py | 127 +++++++++--------- 3 files changed, 87 insertions(+), 174 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index e737c0c91ff97a..6d7b93e3c85585 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -59,7 +59,7 @@ res = Py_IsFalse(value) ? Py_True : Py_False; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 1) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 1) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -148,19 +148,6 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation - if (is_const(__left_) && is_const(__right_)) { - PyObject *right; - PyObject *left; - left = get_const(__left_); - right = get_const(__right_); - if (!PyLong_CheckExact(left)) goto error; - if (!PyLong_CheckExact(right)) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -194,7 +181,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -228,7 +215,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -262,7 +249,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -282,19 +269,6 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation - if (is_const(__left_) && is_const(__right_)) { - PyObject *right; - PyObject *left; - left = get_const(__left_); - right = get_const(__right_); - if (!PyFloat_CheckExact(left)) goto error; - if (!PyFloat_CheckExact(right)) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -329,7 +303,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -364,7 +338,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -399,7 +373,7 @@ DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -419,19 +393,6 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; - // Constant evaluation - if (is_const(__left_) && is_const(__right_)) { - PyObject *right; - PyObject *left; - left = get_const(__left_); - right = get_const(__right_); - if (!PyUnicode_CheckExact(left)) goto error; - if (!PyUnicode_CheckExact(right)) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -465,7 +426,7 @@ __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); if(__res_ == NULL) { goto error; } - if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } new_inst.opcode = _NOP; } else { @@ -962,18 +923,6 @@ _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation - if (is_const(__owner_)) { - PyObject *owner; - owner = get_const(__owner_); - PyTypeObject *tp = Py_TYPE(owner); - assert(type_version != 0); - if (tp->tp_version_tag != type_version) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1127,18 +1076,6 @@ case _GUARD_DORV_VALUES: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; - // Constant evaluation - if (is_const(__owner_)) { - PyObject *owner; - owner = get_const(__owner_); - assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - if (!_PyDictOrValues_IsValues(dorv)) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1539,18 +1476,6 @@ _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation - if (is_const(__owner_)) { - PyObject *owner; - owner = get_const(__owner_); - PyTypeObject *owner_cls = Py_TYPE(owner); - PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; - if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1650,19 +1575,6 @@ _Py_UOpsSymType *__callable_; __null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; - // Constant evaluation - if (is_const(__callable_) && is_const(__null_)) { - PyObject *null; - PyObject *callable; - callable = get_const(__callable_); - null = get_const(__null_); - if (null != NULL) goto error; - if (Py_TYPE(callable) != &PyMethod_Type) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1695,22 +1607,6 @@ __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)CURRENT_OPERAND(); - // Constant evaluation - if (is_const(__callable_) && is_const(__self_or_null_)) { - PyObject *self_or_null; - PyObject *callable; - callable = get_const(__callable_); - self_or_null = get_const(__self_or_null_); - if (!PyFunction_Check(callable)) goto error; - PyFunctionObject *func = (PyFunctionObject *)callable; - if (func->func_version != func_version) goto error; - PyCodeObject *code = (PyCodeObject *)func->func_code; - if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; - - DPRINTF(3, "const eliminated guard\n"); - new_inst.opcode = _NOP; - break; - } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { DPRINTF(2, "type propagation eliminated guard\n"); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0675a89717d1af..7402524833a2ea 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -492,6 +492,15 @@ sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) { PyTypeObject *tp = Py_TYPE(obj); + if (tp->tp_version_tag != 0) { + sym_set_type(sym, GUARD_TYPE_VERSION_TYPE, tp->tp_version_tag); + } + if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(obj); + if(_PyDictOrValues_IsValues(dorv)) { + sym_set_type(sym, GUARD_DORV_VALUES_TYPE, 0); + } + } if (tp == &PyLong_Type) { sym_set_type(sym, PYLONG_TYPE, 0); } @@ -501,6 +510,11 @@ sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) else if (tp == &PyUnicode_Type) { sym_set_type(sym, PYUNICODE_TYPE, 0); } + else if (tp == &PyFunction_Type) { + sym_set_type(sym, PYFUNCTION_TYPE_VERSION_TYPE, + ((PyFunctionObject *)(obj))->func_version); + } + } diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 669d6aa882a168..9c0ccc1479298d 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -242,7 +242,7 @@ def _write_body_abstract_interp_pure_uop( maybe_const_val = new_sym(const_val) out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") out.emit(f"if({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") - out.emit(f" if (emit_const(&ctx->emitter, {const_val}, " + out.emit(f"if (emit_const(&ctx->emitter, {const_val}, " f"{len(uop.stack.inputs)}) < 0) {{ goto error; }}\n") out.emit("new_inst.opcode = _NOP;\n") out.emit("}\n") @@ -282,71 +282,74 @@ def _write_body_abstract_interp_guard_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - out.emit("// Constant evaluation\n") - predicates_str = " && ".join( - [ - f"is_const({var.name})" - for var in mangled_uop.stack.inputs - if var.name not in UNUSED - ] - ) - if predicates_str: - out.emit(f"if ({predicates_str}) {{\n") - declare_variables(uop, out, default_type="PyObject *") - for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): - if var.name in UNUSED: + # Does any of the output specify types? If so, we can eliminate the guard based on types. + can_type_eliminate = any(output_var.type_prop for output_var in mangled_uop.stack.outputs) + + # Cannot type eliminate -- try constant evaluation instead. + # We don't need to try + if not can_type_eliminate: + out.emit("// Constant evaluation\n") + predicates_str = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) + if predicates_str: + out.emit(f"if ({predicates_str}) {{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + if var.name in UNUSED: + continue + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + # Guard elimination + out.emit('DPRINTF(3, "const eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;\n") + out.emit("break;\n") + out.emit("}\n") + else: + # If the input types already match, eliminate the guard + # Read the cache information to check the auxiliary type information + predicates = [] + propagates = [] + + assert len(mangled_uop.stack.outputs) == len( + mangled_uop.stack.inputs + ), "guards must have same number of args" + assert [ + output == input_ + for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) + ], "guards must forward their stack values" + for output_var in mangled_uop.stack.outputs: + if output_var.name in UNUSED: continue - out.emit(f"{var.name} = get_const({mangled_var.name});\n") - emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) - out.emit("\n") - # Guard elimination - out.emit('DPRINTF(3, "const eliminated guard\\n");\n') + if (typ := output_var.type_prop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + # Check that the input type information match (including auxiliary info) + predicates.append( + f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + # Propagate mode - set the types + propagates.append( + f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + + out.emit("// Type guard elimination\n") + out.emit(f"if ({' && '.join(predicates)}) {{\n") + out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') out.emit("new_inst.opcode = _NOP;\n") out.emit("break;\n") out.emit("}\n") - - # Does the input specify typed inputs? - if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): - return - # If the input types already match, eliminate the guard - # Read the cache information to check the auxiliary type information - predicates = [] - propagates = [] - - assert len(mangled_uop.stack.outputs) == len( - mangled_uop.stack.inputs - ), "guards must have same number of args" - assert [ - output == input_ - for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) - ], "guards must forward their stack values" - for output_var in mangled_uop.stack.outputs: - if output_var.name in UNUSED: - continue - if (typ := output_var.type_prop) is not None: - typname, aux = typ - aux = "0" if aux is None else aux - # Check that the input type information match (including auxiliary info) - predicates.append( - f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" - ) - # Propagate mode - set the types - propagates.append( - f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" - ) - - out.emit("// Type guard elimination\n") - out.emit(f"if ({' && '.join(predicates)}) {{\n") - out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') - out.emit("new_inst.opcode = _NOP;\n") - out.emit("break;\n") - out.emit("}\n") - # Else we need the guard - out.emit("else {\n") - out.emit("// Type propagation\n") - for prop in propagates: - out.emit(f"{prop};\n") - out.emit("}\n") + # Else we need the guard + out.emit("else {\n") + out.emit("// Type propagation\n") + for prop in propagates: + out.emit(f"{prop};\n") + out.emit("}\n") def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: From cfe1de02ffb8fbf1fcd2f06cc86e4505f6a77579 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 16:27:20 +0800 Subject: [PATCH 098/111] cleanup, reduce memory usage even more --- Python/optimizer_analysis.c | 54 +++++++++---------------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7402524833a2ea..018d312c4cf706 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -37,6 +37,8 @@ #define OVERALLOCATE_FACTOR 5 +#define ARENA_SIZE (UOP_MAX_TRACE_WORKING_LENGTH * OVERALLOCATE_FACTOR) + // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) @@ -121,7 +123,7 @@ typedef struct _Py_UOpsAbstractFrame { typedef struct ty_arena { int ty_curr_number; int ty_max_number; - _Py_UOpsSymType *arena; + _Py_UOpsSymType arena[ARENA_SIZE]; } ty_arena; typedef struct frequent_syms { @@ -166,13 +168,10 @@ abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) curr--; } self->curr_frame_depth = 0; - if (self->t_arena.arena != NULL) { - int tys = self->t_arena.ty_curr_number; - for (int i = 0; i < tys; i++) { - Py_CLEAR(self->t_arena.arena[i].const_val); - } + int tys = self->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(self->t_arena.arena[i].const_val); } - PyMem_Free(self->t_arena.arena); PyMem_Free(self); } @@ -203,13 +202,7 @@ abstractinterp_context_new(PyCodeObject *co, int stack_len = co->co_stacksize; _Py_UOpsAbstractFrame *frame = NULL; _Py_UOpsAbstractInterpContext *self = NULL; - _Py_UOpsSymType *t_arena = NULL; - int ty_arena_size = ir_entries * OVERALLOCATE_FACTOR; - t_arena = (_Py_UOpsSymType *)PyMem_New(_Py_UOpsSymType, ty_arena_size); - if (t_arena == NULL) { - goto error; - } self = PyMem_New(_Py_UOpsAbstractInterpContext, 1); if (self == NULL) { @@ -225,8 +218,7 @@ abstractinterp_context_new(PyCodeObject *co, // Setup the arena for sym expressions. self->t_arena.ty_curr_number = 0; - self->t_arena.arena = t_arena; - self->t_arena.ty_max_number = ty_arena_size; + self->t_arena.ty_max_number = ARENA_SIZE; // Frame setup @@ -256,10 +248,8 @@ abstractinterp_context_new(PyCodeObject *co, return self; error: - PyMem_Free(t_arena); if (self != NULL) { // Important so we don't double free them. - self->t_arena.arena = NULL; self->frame = NULL; } abstractinterp_dealloc(self); @@ -477,7 +467,6 @@ sym_set_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinem } } -// Note: for this, to_sym MUST point to brand new sym. static void sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) { @@ -593,17 +582,6 @@ op_is_end(uint32_t opcode) opcode == _JUMP_ABSOLUTE; } -static inline bool -op_is_guard(uint32_t opcode) -{ - return _PyUop_Flags[opcode] & HAS_GUARD_FLAG; -} - -static inline bool -op_is_pure(uint32_t opcode) -{ - return _PyUop_Flags[opcode] & HAS_PURE_FLAG; -} static inline bool op_is_bookkeeping(uint32_t opcode) { @@ -613,11 +591,6 @@ op_is_bookkeeping(uint32_t opcode) { opcode == _RESUME_CHECK); } -static inline bool -op_is_specially_handled(uint32_t opcode) -{ - return _PyUop_Flags[opcode] & HAS_SPECIAL_OPT_FLAG; -} static inline bool is_const(_Py_UOpsSymType *expr) @@ -1280,10 +1253,10 @@ uop_abstract_interpret( ; while (curr < end && !op_is_end(curr->opcode)) { - if (!op_is_pure(curr->opcode) && - !op_is_specially_handled(curr->opcode) && + if (!(_PyUop_Flags[curr->opcode] & HAS_PURE_FLAG) && + !(_PyUop_Flags[curr->opcode] & HAS_SPECIAL_OPT_FLAG) && !op_is_bookkeeping(curr->opcode) && - !op_is_guard(curr->opcode)) { + !(_PyUop_Flags[curr->opcode] & HAS_GUARD_FLAG)) { DPRINTF(3, "Impure %s\n", _PyOpcode_uop_name[curr->opcode]); if (needs_clear_locals) { if (clear_locals_type_info(ctx) < 0) { @@ -1411,18 +1384,15 @@ _Py_uop_analyze_and_optimize( { OPT_STAT_INC(optimizer_attempts); _PyUOpInstruction temp_writebuffer[UOP_MAX_TRACE_WORKING_LENGTH]; - _PyUOpInstruction temp_readbuffer[UOP_MAX_TRACE_WORKING_LENGTH]; - - memcpy(temp_readbuffer, buffer, buffer_size * sizeof(_PyUOpInstruction)); - int err = remove_globals(frame, temp_readbuffer, buffer_size, dependencies); + int err = remove_globals(frame, buffer, buffer_size, dependencies); if (err <= 0) { goto error; } // Pass: Abstract interpretation and symbolic analysis int new_trace_len = uop_abstract_interpret( - (PyCodeObject *)frame->f_executable, temp_readbuffer, temp_writebuffer, + (PyCodeObject *)frame->f_executable, buffer, temp_writebuffer, buffer_size, curr_stacklen); if (new_trace_len < 0) { From 5ad62116feec1323e38c11b4438b95b15d1d028b Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 16:38:21 +0800 Subject: [PATCH 099/111] remove more memory allocations --- Python/optimizer_analysis.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 018d312c4cf706..a70ce20dfd9694 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -37,7 +37,7 @@ #define OVERALLOCATE_FACTOR 5 -#define ARENA_SIZE (UOP_MAX_TRACE_WORKING_LENGTH * OVERALLOCATE_FACTOR) +#define TY_ARENA_SIZE (UOP_MAX_TRACE_WORKING_LENGTH * OVERALLOCATE_FACTOR) // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) @@ -123,7 +123,7 @@ typedef struct _Py_UOpsAbstractFrame { typedef struct ty_arena { int ty_curr_number; int ty_max_number; - _Py_UOpsSymType arena[ARENA_SIZE]; + _Py_UOpsSymType arena[TY_ARENA_SIZE]; } ty_arena; typedef struct frequent_syms { @@ -161,11 +161,6 @@ abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) { if (self == NULL) { return; - } - int curr = self->curr_frame_depth - 1; - while (curr >= 0) { - PyMem_Free(self->frames[curr].sym_consts); - curr--; } self->curr_frame_depth = 0; int tys = self->t_arena.ty_curr_number; @@ -218,7 +213,7 @@ abstractinterp_context_new(PyCodeObject *co, // Setup the arena for sym expressions. self->t_arena.ty_curr_number = 0; - self->t_arena.ty_max_number = ARENA_SIZE; + self->t_arena.ty_max_number = TY_ARENA_SIZE; // Frame setup @@ -253,7 +248,6 @@ abstractinterp_context_new(PyCodeObject *co, self->frame = NULL; } abstractinterp_dealloc(self); - PyMem_Free(frame->sym_consts); return NULL; } @@ -264,8 +258,9 @@ static inline _Py_UOpsSymType ** create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) { Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); - _Py_UOpsSymType **sym_consts = PyMem_New(_Py_UOpsSymType *, co_const_len); - if (sym_consts == NULL) { + _Py_UOpsSymType **sym_consts = ctx->limit - co_const_len; + ctx->limit -= co_const_len; + if (ctx->limit <= ctx->water_level) { return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { @@ -278,7 +273,6 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) return sym_consts; error: - PyMem_Free(sym_consts); return NULL; } @@ -423,10 +417,11 @@ ctx_frame_pop( _Py_UOpsAbstractFrame *frame = ctx->frame; ctx->water_level = frame->locals; - PyMem_Free(frame->sym_consts); ctx->curr_frame_depth--; assert(ctx->curr_frame_depth >= 1); ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + ctx->limit += frame->sym_consts_len; return 0; } From 73daf91f82df355848fec7c955efe99b682465c0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 16:50:13 +0800 Subject: [PATCH 100/111] zero dynamic memory allocation --- Python/optimizer_analysis.c | 84 +++++++++++++++---------------------- 1 file changed, 33 insertions(+), 51 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a70ce20dfd9694..d1d3f4718b8eba 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -157,7 +157,7 @@ typedef struct _Py_UOpsAbstractInterpContext { } _Py_UOpsAbstractInterpContext; static void -abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) +abstractinterp_fini(_Py_UOpsAbstractInterpContext *self) { if (self == NULL) { return; @@ -167,7 +167,6 @@ abstractinterp_dealloc(_Py_UOpsAbstractInterpContext *self) for (int i = 0; i < tys; i++) { Py_CLEAR(self->t_arena.arena[i].const_val); } - PyMem_Free(self); } @@ -187,22 +186,19 @@ static inline int frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, int locals_len, int curr_stacklen); -static _Py_UOpsAbstractInterpContext * -abstractinterp_context_new(PyCodeObject *co, - int curr_stacklen, - int ir_entries, - _PyUOpInstruction *new_writebuffer) +static int +abstractinterp_init( + _Py_UOpsAbstractInterpContext *self, + PyCodeObject *co, + int curr_stacklen, + int ir_entries, + _PyUOpInstruction *new_writebuffer +) { int locals_len = co->co_nlocalsplus; int stack_len = co->co_stacksize; _Py_UOpsAbstractFrame *frame = NULL; - _Py_UOpsAbstractInterpContext *self = NULL; - - self = PyMem_New(_Py_UOpsAbstractInterpContext, 1); - if (self == NULL) { - goto error; - } self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->locals_and_stack; @@ -220,14 +216,14 @@ abstractinterp_context_new(PyCodeObject *co, self->curr_frame_depth = 0; frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen); if (frame == NULL) { - goto error; + return -1; } if (frame_push(self, frame, self->water_level, locals_len, curr_stacklen, stack_len + locals_len) < 0) { - goto error; + return -1; } if (frame_initalize(self, frame, locals_len, curr_stacklen) < 0) { - goto error; + return -1; } self->frame = frame; assert(frame != NULL); @@ -240,15 +236,6 @@ abstractinterp_context_new(PyCodeObject *co, self->emitter.curr_i = 0; self->emitter.writebuffer_end = new_writebuffer + ir_entries; - return self; - -error: - if (self != NULL) { - // Important so we don't double free them. - self->frame = NULL; - } - abstractinterp_dealloc(self); - return NULL; } static inline _Py_UOpsSymType* @@ -722,11 +709,6 @@ emit_const(uops_emitter *emitter, return 0; } -typedef enum { - ABSTRACT_INTERP_ERROR, - ABSTRACT_INTERP_NORMAL, -} AbstractInterpExitCodes; - #define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ do { \ @@ -997,17 +979,17 @@ uop_abstract_interpret_single_inst( assert(STACK_LEVEL() >= 0); if (emit_i(&ctx->emitter, new_inst) < 0) { - return ABSTRACT_INTERP_ERROR; + return -1; } - return ABSTRACT_INTERP_NORMAL; + return 0; pop_2_error_tier_two: STACK_SHRINK(1); STACK_SHRINK(1); error: DPRINTF(1, "Encountered error in abstract interpreter\n"); - return ABSTRACT_INTERP_ERROR; + return -1; } @@ -1226,17 +1208,17 @@ uop_abstract_interpret( { bool did_loop_peel = false; - _Py_UOpsAbstractInterpContext *ctx = NULL; + _Py_UOpsAbstractInterpContext ctx; - ctx = abstractinterp_context_new( + if (abstractinterp_init( + &ctx, co, curr_stacklen, - trace_len, new_trace); - if (ctx == NULL) { + trace_len, new_trace) < 0) { goto error; } _PyUOpInstruction *curr = NULL; _PyUOpInstruction *end = NULL; - AbstractInterpExitCodes status = ABSTRACT_INTERP_NORMAL; + int status = 0; bool needs_clear_locals = true; bool has_enough_space_to_duplicate_loop = true; int res = 0; @@ -1254,7 +1236,7 @@ uop_abstract_interpret( !(_PyUop_Flags[curr->opcode] & HAS_GUARD_FLAG)) { DPRINTF(3, "Impure %s\n", _PyOpcode_uop_name[curr->opcode]); if (needs_clear_locals) { - if (clear_locals_type_info(ctx) < 0) { + if (clear_locals_type_info(&ctx) < 0) { goto error; } } @@ -1266,9 +1248,9 @@ uop_abstract_interpret( status = uop_abstract_interpret_single_inst( - curr, end, ctx + curr, end, &ctx ); - if (status == ABSTRACT_INTERP_ERROR) { + if (status == -1) { goto error; } @@ -1281,13 +1263,13 @@ uop_abstract_interpret( // If we end in a loop, and we have a lot of space left, peel the loop for // poor man's loop invariant code motion for guards // https://en.wikipedia.org/wiki/Loop_splitting - has_enough_space_to_duplicate_loop = ((ctx->emitter.curr_i * 3) < - (int)(ctx->emitter.writebuffer_end - ctx->emitter.writebuffer)); + has_enough_space_to_duplicate_loop = ((ctx.emitter.curr_i * 3) < + (int)(ctx.emitter.writebuffer_end - ctx.emitter.writebuffer)); if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && has_enough_space_to_duplicate_loop) { OPT_STAT_INC(loop_body_duplication_attempts); did_loop_peel = true; - _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx->emitter.curr_i), 0, 0}; - if (emit_i(&ctx->emitter, jump_header) < 0) { + _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx.emitter.curr_i), 0, 0}; + if (emit_i(&ctx.emitter, jump_header) < 0) { goto error; } DPRINTF(1, "loop_peeling!\n"); @@ -1303,25 +1285,25 @@ uop_abstract_interpret( if (did_loop_peel) { OPT_STAT_INC(loop_body_duplication_successes); assert(curr->opcode == _JUMP_TO_TOP); - _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx->emitter.curr_i), 0, 0}; - if (emit_i(&ctx->emitter, jump_abs) < 0) { + _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx.emitter.curr_i), 0, 0}; + if (emit_i(&ctx.emitter, jump_abs) < 0) { goto error; } } else { - if (emit_i(&ctx->emitter, *curr) < 0) { + if (emit_i(&ctx.emitter, *curr) < 0) { goto error; } } } - res = ctx->emitter.curr_i; - abstractinterp_dealloc(ctx); + res = ctx.emitter.curr_i; + abstractinterp_fini(&ctx); return res; error: - abstractinterp_dealloc(ctx); + abstractinterp_fini(&ctx); return -1; } From cac361670f97a4623747688731966eaf7aa389ad Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 17:58:22 +0800 Subject: [PATCH 101/111] forgot return --- Python/optimizer_analysis.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d1d3f4718b8eba..97281ca5c6ed99 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -33,7 +33,8 @@ #include #include -#define MAX_ABSTRACT_INTERP_SIZE 2048 +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 #define OVERALLOCATE_FACTOR 5 @@ -235,7 +236,7 @@ abstractinterp_init( self->emitter.writebuffer = new_writebuffer; self->emitter.curr_i = 0; self->emitter.writebuffer_end = new_writebuffer + ir_entries; - + return 0; } static inline _Py_UOpsSymType* From e7df93a97824e520832a20629d5778427f1d1219 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:12:32 +0800 Subject: [PATCH 102/111] streamline frame creation --- Python/optimizer_analysis.c | 234 ++++++++++++------------------------ 1 file changed, 75 insertions(+), 159 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 97281ca5c6ed99..0bb3c99c431e97 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -157,88 +157,6 @@ typedef struct _Py_UOpsAbstractInterpContext { _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; } _Py_UOpsAbstractInterpContext; -static void -abstractinterp_fini(_Py_UOpsAbstractInterpContext *self) -{ - if (self == NULL) { - return; - } - self->curr_frame_depth = 0; - int tys = self->t_arena.ty_curr_number; - for (int i = 0; i < tys; i++) { - Py_CLEAR(self->t_arena.arena[i].const_val); - } -} - - -static inline _Py_UOpsAbstractFrame * -frame_new(_Py_UOpsAbstractInterpContext *ctx, - PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen); -static inline int -frame_push(_Py_UOpsAbstractInterpContext *ctx, - _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymType **localsplus_start, - int locals_len, - int curr_stacklen, - int total_len); - -static inline int -frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - int locals_len, int curr_stacklen); - -static int -abstractinterp_init( - _Py_UOpsAbstractInterpContext *self, - PyCodeObject *co, - int curr_stacklen, - int ir_entries, - _PyUOpInstruction *new_writebuffer -) -{ - int locals_len = co->co_nlocalsplus; - int stack_len = co->co_stacksize; - _Py_UOpsAbstractFrame *frame = NULL; - - - self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; - self->water_level = self->locals_and_stack; - for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - self->locals_and_stack[i] = NULL; - } - - - // Setup the arena for sym expressions. - self->t_arena.ty_curr_number = 0; - self->t_arena.ty_max_number = TY_ARENA_SIZE; - - // Frame setup - - self->curr_frame_depth = 0; - frame = frame_new(self, co->co_consts, stack_len, locals_len, curr_stacklen); - if (frame == NULL) { - return -1; - } - if (frame_push(self, frame, self->water_level, locals_len, curr_stacklen, - stack_len + locals_len) < 0) { - return -1; - } - if (frame_initalize(self, frame, locals_len, curr_stacklen) < 0) { - return -1; - } - self->frame = frame; - assert(frame != NULL); - - // IR and sym setup - self->frequent_syms.push_nulL_sym = NULL; - - // Emitter setup - self->emitter.writebuffer = new_writebuffer; - self->emitter.curr_i = 0; - self->emitter.writebuffer_end = new_writebuffer + ir_entries; - return 0; -} - static inline _Py_UOpsSymType* sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val); @@ -260,90 +178,117 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) } return sym_consts; -error: + error: return NULL; } +static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); -static inline _Py_UOpsSymType* -sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); +// 0 on success, anything else is error. +static int +ctx_frame_push( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + _Py_UOpsSymType **localsplus_start, + int curr_stackentries +) +{ + _Py_UOpsSymType **sym_consts = create_sym_consts(ctx, co->co_consts); + if (sym_consts == NULL) { + return -1; + } + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + ctx->curr_frame_depth++; -static void -sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym); + frame->sym_consts = sym_consts; + frame->sym_consts_len = (int)Py_SIZE(co->co_consts); + frame->stack_len = co->co_stacksize; + frame->locals_len = co->co_nlocalsplus; -/* - * The reason why we have a separate frame_push and frame_initialize is to mimic - * what CPython's frame push does. This also prepares for inlining. - * */ -static inline int -frame_push(_Py_UOpsAbstractInterpContext *ctx, - _Py_UOpsAbstractFrame *frame, - _Py_UOpsSymType **localsplus_start, - int locals_len, - int curr_stacklen, - int total_len) -{ frame->locals = localsplus_start; - frame->stack = frame->locals + locals_len; - frame->stack_pointer = frame->stack + curr_stacklen; - ctx->water_level = localsplus_start + total_len; + frame->stack = frame->locals + co->co_nlocalsplus; + frame->stack_pointer = frame->stack + curr_stackentries; + ctx->water_level = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); if (ctx->water_level > ctx->limit) { return -1; } - return 0; -} -static inline int -frame_initalize(_Py_UOpsAbstractInterpContext *ctx, _Py_UOpsAbstractFrame *frame, - int locals_len, int curr_stacklen) -{ + // Initialize with the initial state of all local variables - for (int i = 0; i < locals_len; i++) { + for (int i = 0; i < co->co_nlocalsplus; i++) { _Py_UOpsSymType *local = sym_init_unknown(ctx); if (local == NULL) { - goto error; + return -1; } frame->locals[i] = local; } // Initialize the stack as well - for (int i = 0; i < curr_stacklen; i++) { + for (int i = 0; i < curr_stackentries; i++) { _Py_UOpsSymType *stackvar = sym_init_unknown(ctx); if (stackvar == NULL) { - goto error; + return -1; } frame->stack[i] = stackvar; } + ctx->frame = frame; return 0; +} -error: - return -1; +static void +abstractinterp_fini(_Py_UOpsAbstractInterpContext *self) +{ + if (self == NULL) { + return; + } + self->curr_frame_depth = 0; + int tys = self->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(self->t_arena.arena[i].const_val); + } } -static inline _Py_UOpsAbstractFrame * -frame_new(_Py_UOpsAbstractInterpContext *ctx, - PyObject *co_consts, int stack_len, int locals_len, - int curr_stacklen) +static int +abstractinterp_init( + _Py_UOpsAbstractInterpContext *self, + PyCodeObject *co, + int curr_stacklen, + int ir_entries, + _PyUOpInstruction *new_writebuffer +) { - _Py_UOpsSymType **sym_consts = create_sym_consts(ctx, co_consts); - if (sym_consts == NULL) { - return NULL; + + + self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + self->water_level = self->locals_and_stack; + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + self->locals_and_stack[i] = NULL; } - assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); - _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; - ctx->curr_frame_depth++; - frame->sym_consts = sym_consts; - frame->sym_consts_len = (int)Py_SIZE(co_consts); - frame->stack_len = stack_len; - frame->locals_len = locals_len; + // Setup the arena for sym expressions. + self->t_arena.ty_curr_number = 0; + self->t_arena.ty_max_number = TY_ARENA_SIZE; + + // Frame setup + + self->curr_frame_depth = 0; + ctx_frame_push(self, co, self->water_level, curr_stacklen); - return frame; + // IR and sym setup + self->frequent_syms.push_nulL_sym = NULL; + + // Emitter setup + self->emitter.writebuffer = new_writebuffer; + self->emitter.curr_i = 0; + self->emitter.writebuffer_end = new_writebuffer + ir_entries; + return 0; } + static inline bool sym_is_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); static inline uint64_t @@ -368,35 +313,6 @@ extract_func_from_sym(_Py_UOpsSymType *callable_sym) } -// 0 on success, anything else is error. -static int -ctx_frame_push( - _Py_UOpsAbstractInterpContext *ctx, - PyCodeObject *co, - _Py_UOpsSymType **localsplus_start -) -{ - _Py_UOpsAbstractFrame *frame = frame_new(ctx, - co->co_consts, co->co_stacksize, - co->co_nlocalsplus, - 0); - if (frame == NULL) { - return -1; - } - if (frame_push(ctx, frame, localsplus_start, co->co_nlocalsplus, 0, - co->co_nlocalsplus + co->co_stacksize) < 0) { - return -1; - } - if (frame_initalize(ctx, frame, co->co_nlocalsplus, 0) < 0) { - return -1; - } - - ctx->frame = frame; - - - return 0; -} - static int ctx_frame_pop( _Py_UOpsAbstractInterpContext *ctx @@ -928,7 +844,7 @@ uop_abstract_interpret_single_inst( // This is _PUSH_FRAME's stack effect STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; - if (ctx_frame_push(ctx, co, ctx->water_level) != 0){ + if (ctx_frame_push(ctx, co, ctx->water_level, 0) != 0){ goto error; } stack_pointer = ctx->frame->stack_pointer; From 65fae9aceceee8db031e5cb686cca7fc64a5c5cd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:15:27 +0800 Subject: [PATCH 103/111] more cleanup --- Python/optimizer_analysis.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0bb3c99c431e97..132ce6b0ae0ff3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -260,21 +260,17 @@ abstractinterp_init( _PyUOpInstruction *new_writebuffer ) { - - self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->locals_and_stack; for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { self->locals_and_stack[i] = NULL; } - // Setup the arena for sym expressions. self->t_arena.ty_curr_number = 0; self->t_arena.ty_max_number = TY_ARENA_SIZE; // Frame setup - self->curr_frame_depth = 0; ctx_frame_push(self, co, self->water_level, curr_stacklen); @@ -403,7 +399,6 @@ sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) ((PyFunctionObject *)(obj))->func_version); } - } @@ -1163,7 +1158,6 @@ uop_abstract_interpret( needs_clear_locals = true; } - status = uop_abstract_interpret_single_inst( curr, end, &ctx ); From 3fec95978f96e1ffcadba4678c26a2b21141e74c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:32:06 +0800 Subject: [PATCH 104/111] fix bug in return code --- Python/optimizer_analysis.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 132ce6b0ae0ff3..94cd83f2d949b7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -172,14 +172,12 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) for (Py_ssize_t i = 0; i < co_const_len; i++) { _Py_UOpsSymType *res = sym_init_const(ctx, PyTuple_GET_ITEM(co_consts, i)); if (res == NULL) { - goto error; + return NULL; } sym_consts[i] = res; } return sym_consts; - error: - return NULL; } static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); @@ -210,7 +208,7 @@ ctx_frame_push( frame->stack = frame->locals + co->co_nlocalsplus; frame->stack_pointer = frame->stack + curr_stackentries; ctx->water_level = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); - if (ctx->water_level > ctx->limit) { + if (ctx->water_level >= ctx->limit) { return -1; } @@ -262,9 +260,11 @@ abstractinterp_init( { self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->locals_and_stack; +#if Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { self->locals_and_stack[i] = NULL; } +#endif // Setup the arena for sym expressions. self->t_arena.ty_curr_number = 0; @@ -272,7 +272,9 @@ abstractinterp_init( // Frame setup self->curr_frame_depth = 0; - ctx_frame_push(self, co, self->water_level, curr_stacklen); + if (ctx_frame_push(self, co, self->water_level, curr_stacklen) < 0) { + return -1; + } // IR and sym setup self->frequent_syms.push_nulL_sym = NULL; From dde7d12e63945cd25171900ee26553ec95a63b5c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 19:32:33 +0800 Subject: [PATCH 105/111] fix on MSVC --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 94cd83f2d949b7..4178a9319ed09a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -260,7 +260,7 @@ abstractinterp_init( { self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; self->water_level = self->locals_and_stack; -#if Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. +#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { self->locals_and_stack[i] = NULL; } From 04c902b27f114f3fdccf4e4a48e41d7d3bd9be27 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 19:49:19 +0800 Subject: [PATCH 106/111] Add back constant evaluation --- Python/abstract_interp_cases.c.h | 104 +++++++++++++++ .../tier2_abstract_generator.py | 125 +++++++++--------- 2 files changed, 165 insertions(+), 64 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6d7b93e3c85585..26f8793ab56020 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -148,6 +148,19 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyLong_CheckExact(left)) goto error; + if (!PyLong_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -269,6 +282,19 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyFloat_CheckExact(left)) goto error; + if (!PyFloat_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -393,6 +419,19 @@ _Py_UOpsSymType *__left_; __right_ = stack_pointer[-1]; __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyUnicode_CheckExact(left)) goto error; + if (!PyUnicode_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -923,6 +962,18 @@ _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *tp = Py_TYPE(owner); + assert(type_version != 0); + if (tp->tp_version_tag != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1076,6 +1127,18 @@ case _GUARD_DORV_VALUES: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + if (!_PyDictOrValues_IsValues(dorv)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1476,6 +1539,18 @@ _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *owner_cls = Py_TYPE(owner); + PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; + if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1575,6 +1650,19 @@ _Py_UOpsSymType *__callable_; __null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; + // Constant evaluation + if (is_const(__callable_) && is_const(__null_)) { + PyObject *null; + PyObject *callable; + callable = get_const(__callable_); + null = get_const(__null_); + if (null != NULL) goto error; + if (Py_TYPE(callable) != &PyMethod_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0)) { DPRINTF(2, "type propagation eliminated guard\n"); @@ -1607,6 +1695,22 @@ __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + // Constant evaluation + if (is_const(__callable_) && is_const(__self_or_null_)) { + PyObject *self_or_null; + PyObject *callable; + callable = get_const(__callable_); + self_or_null = get_const(__self_or_null_); + if (!PyFunction_Check(callable)) goto error; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto error; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } // Type guard elimination if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { DPRINTF(2, "type propagation eliminated guard\n"); diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 9c0ccc1479298d..875c94f6792ed7 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -282,74 +282,71 @@ def _write_body_abstract_interp_guard_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - # Does any of the output specify types? If so, we can eliminate the guard based on types. - can_type_eliminate = any(output_var.type_prop for output_var in mangled_uop.stack.outputs) - - # Cannot type eliminate -- try constant evaluation instead. - # We don't need to try - if not can_type_eliminate: - out.emit("// Constant evaluation\n") - predicates_str = " && ".join( - [ - f"is_const({var.name})" - for var in mangled_uop.stack.inputs - if var.name not in UNUSED - ] - ) - if predicates_str: - out.emit(f"if ({predicates_str}) {{\n") - declare_variables(uop, out, default_type="PyObject *") - for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): - if var.name in UNUSED: - continue - out.emit(f"{var.name} = get_const({mangled_var.name});\n") - emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) - out.emit("\n") - # Guard elimination - out.emit('DPRINTF(3, "const eliminated guard\\n");\n') - out.emit("new_inst.opcode = _NOP;\n") - out.emit("break;\n") - out.emit("}\n") - else: - # If the input types already match, eliminate the guard - # Read the cache information to check the auxiliary type information - predicates = [] - propagates = [] - - assert len(mangled_uop.stack.outputs) == len( - mangled_uop.stack.inputs - ), "guards must have same number of args" - assert [ - output == input_ - for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) - ], "guards must forward their stack values" - for output_var in mangled_uop.stack.outputs: - if output_var.name in UNUSED: + out.emit("// Constant evaluation\n") + predicates_str = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) + if predicates_str: + out.emit(f"if ({predicates_str}) {{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + if var.name in UNUSED: continue - if (typ := output_var.type_prop) is not None: - typname, aux = typ - aux = "0" if aux is None else aux - # Check that the input type information match (including auxiliary info) - predicates.append( - f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" - ) - # Propagate mode - set the types - propagates.append( - f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" - ) - - out.emit("// Type guard elimination\n") - out.emit(f"if ({' && '.join(predicates)}) {{\n") - out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + # Guard elimination + out.emit('DPRINTF(3, "const eliminated guard\\n");\n') out.emit("new_inst.opcode = _NOP;\n") out.emit("break;\n") out.emit("}\n") - # Else we need the guard - out.emit("else {\n") - out.emit("// Type propagation\n") - for prop in propagates: - out.emit(f"{prop};\n") - out.emit("}\n") + + # Does the input specify typed inputs? + if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): + return + # If the input types already match, eliminate the guard + # Read the cache information to check the auxiliary type information + predicates = [] + propagates = [] + + assert len(mangled_uop.stack.outputs) == len( + mangled_uop.stack.inputs + ), "guards must have same number of args" + assert [ + output == input_ + for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) + ], "guards must forward their stack values" + for output_var in mangled_uop.stack.outputs: + if output_var.name in UNUSED: + continue + if (typ := output_var.type_prop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + # Check that the input type information match (including auxiliary info) + predicates.append( + f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + # Propagate mode - set the types + propagates.append( + f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + + out.emit("// Type guard elimination\n") + out.emit(f"if ({' && '.join(predicates)}) {{\n") + out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;\n") + out.emit("break;\n") + out.emit("}\n") + # Else we need the guard + out.emit("else {\n") + out.emit("// Type propagation\n") + for prop in propagates: + out.emit(f"{prop};\n") + out.emit("}\n") def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: From 05e93c27ef75c9efd16d2d0b2562655c09029b66 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 20:59:14 +0800 Subject: [PATCH 107/111] cleanup --- Python/abstract_interp_cases.c.h | 58 +++++++-------- Python/optimizer_analysis.c | 19 +++-- .../tier2_abstract_generator.py | 73 ++++++++----------- 3 files changed, 68 insertions(+), 82 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 26f8793ab56020..bfedfb6da60581 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -190,6 +190,8 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); @@ -224,6 +226,8 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); @@ -258,6 +262,8 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); @@ -461,6 +467,8 @@ right = get_const(__right_); STAT_INC(BINARY_OP, hit); res = PyUnicode_Concat(left, right); + _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res == NULL) goto pop_2_error_tier_two; __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); @@ -752,11 +760,10 @@ case _LOAD_GLOBAL: { _Py_UOpsSymType *__res_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[0] = __res_; if (oparg & 1) stack_pointer[1] = __null_; stack_pointer += 1 + (oparg & 1); @@ -774,11 +781,10 @@ case _LOAD_GLOBAL_MODULE: { _Py_UOpsSymType *__res_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[0] = __res_; if (oparg & 1) stack_pointer[1] = __null_; stack_pointer += 1 + (oparg & 1); @@ -788,11 +794,10 @@ case _LOAD_GLOBAL_BUILTINS: { _Py_UOpsSymType *__res_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __res_ = sym_init_unknown(ctx); if(__res_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[0] = __res_; if (oparg & 1) stack_pointer[1] = __null_; stack_pointer += 1 + (oparg & 1); @@ -961,8 +966,8 @@ case _GUARD_TYPE_VERSION: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; - uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -994,11 +999,10 @@ case _LOAD_ATTR_INSTANCE_VALUE: { _Py_UOpsSymType *__attr_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __null_; stack_pointer += (oparg & 1); @@ -1008,8 +1012,8 @@ case _CHECK_ATTR_MODULE: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; - uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1028,11 +1032,10 @@ case _LOAD_ATTR_MODULE: { _Py_UOpsSymType *__attr_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __null_; stack_pointer += (oparg & 1); @@ -1062,11 +1065,10 @@ case _LOAD_ATTR_WITH_HINT: { _Py_UOpsSymType *__attr_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __null_; stack_pointer += (oparg & 1); @@ -1076,11 +1078,10 @@ case _LOAD_ATTR_SLOT: { _Py_UOpsSymType *__attr_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __null_; stack_pointer += (oparg & 1); @@ -1090,8 +1091,8 @@ case _CHECK_ATTR_CLASS: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; - uint32_t type_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1109,11 +1110,10 @@ case _LOAD_ATTR_CLASS: { _Py_UOpsSymType *__attr_; _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } __attr_ = sym_init_unknown(ctx); if(__attr_ == NULL) goto error; - __null_ = sym_init_unknown(ctx); - if(__null_ == NULL) goto error; - sym_set_type(__null_, NULL_TYPE, 0); stack_pointer[-1] = __attr_; if (oparg & 1) stack_pointer[0] = __null_; stack_pointer += (oparg & 1); @@ -1538,8 +1538,8 @@ case _GUARD_KEYS_VERSION: { _Py_UOpsSymType *__owner_; __owner_ = stack_pointer[-1]; - uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation + uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); if (is_const(__owner_)) { PyObject *owner; owner = get_const(__owner_); @@ -1694,8 +1694,8 @@ _Py_UOpsSymType *__callable_; __self_or_null_ = stack_pointer[-1 - oparg]; __callable_ = stack_pointer[-2 - oparg]; - uint32_t func_version = (uint32_t)CURRENT_OPERAND(); // Constant evaluation + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); if (is_const(__callable_) && is_const(__self_or_null_)) { PyObject *self_or_null; PyObject *callable; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4178a9319ed09a..ba280fa102fae6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -495,11 +495,17 @@ is_const(_Py_UOpsSymType *expr) } static inline PyObject * -get_const(_Py_UOpsSymType *expr) +get_const_borrow(_Py_UOpsSymType *expr) { return expr->const_val; } +static inline PyObject * +get_const(_Py_UOpsSymType *expr) +{ + return Py_NewRef(expr->const_val); +} + static int clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { @@ -544,16 +550,9 @@ op_is_zappable(int opcode) switch(opcode) { case _SET_IP: case _CHECK_VALIDITY: - case _LOAD_CONST_INLINE: - case _LOAD_CONST_INLINE_BORROW: - case _LOAD_CONST_INLINE_WITH_NULL: - case _LOAD_CONST_INLINE_BORROW_WITH_NULL: - case _LOAD_CONST: - case _LOAD_FAST: - case _NOP: return true; default: - return false; + return _PyUop_Flags[opcode] & HAS_PURE_FLAG; } } @@ -731,7 +730,7 @@ uop_abstract_interpret_single_inst( ctx, oparg); assert(is_const(PEEK(1))); // Peephole: inline constants. - PyObject *val = get_const(PEEK(1)); + PyObject *val = get_const_borrow(PEEK(1)); new_inst.opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; if (new_inst.opcode == _LOAD_CONST_INLINE) { Py_INCREF(val); diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 875c94f6792ed7..32c7a52a64f2ad 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -51,6 +51,8 @@ } +MANGLED_NULL = "__null_" + def declare_variables( uop: Uop, out: CWriter, @@ -75,6 +77,9 @@ def declare_variables( out.emit(f"{type}{var.name} = NULL;\n") else: out.emit(f"{type}{var.name};\n") + if var.name == MANGLED_NULL and not var.peek: + out.emit(f"{var.name} = sym_init_push_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) {{ goto error; }}\n") for var in uop.stack.outputs: if skip_peeks and var.peek: continue @@ -89,6 +94,9 @@ def declare_variables( out.emit(f"{type}{var.name} = NULL;\n") else: out.emit(f"{type}{var.name};\n") + if var.name == MANGLED_NULL and not var.peek: + out.emit(f"{var.name} = sym_init_push_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) {{ goto error; }}\n") def tier2_replace_deopt( @@ -106,31 +114,9 @@ def tier2_replace_deopt( out.emit(") goto error;\n") -def tier2_replace_decref_specialized( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, -) -> None: - parens = 1 - next(tkn_iter) # LPAREN - for tkn in tkn_iter: - if tkn.kind == "LPAREN": - parens += 1 - if tkn.kind == "RPAREN": - parens -= 1 - if parens == 0: - break - next(tkn_iter) # SEMICOLON - - TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy() TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt -TIER2_REPLACEMENT_FUNCTIONS["_Py_DECREF_SPECIALIZED"] = tier2_replace_decref_specialized - def _write_body_abstract_interp_impure_uop( mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack @@ -142,12 +128,11 @@ def _write_body_abstract_interp_impure_uop( continue if var.size == "1": - out.emit(f"{var.name} = sym_init_unknown(ctx);\n") - out.emit(f"if({var.name} == NULL) goto error;\n") - if var.name in ("null", "__null_"): - out.emit(f"sym_set_type({var.name}, NULL_TYPE, 0);\n") - elif var.type_prop: - out.emit(f"sym_set_type({var.name}, {var.type_prop[0]}, 0);\n") + if var.name != MANGLED_NULL: + out.emit(f"{var.name} = sym_init_unknown(ctx);\n") + out.emit(f"if({var.name} == NULL) goto error;\n") + if var.type_prop: + out.emit(f"sym_set_type({var.name}, {var.type_prop[0]}, 0);\n") else: # See UNPACK_SEQUENCE for when we need this. out.emit( @@ -202,6 +187,17 @@ def new_sym( ) +def declare_caches(uop: Uop, out: CWriter): + for cache in uop.caches: + if cache.name not in UNUSED: + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") + + def _write_body_abstract_interp_pure_uop( mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack ) -> None: @@ -209,17 +205,13 @@ def _write_body_abstract_interp_pure_uop( mangled_uop.stack.inputs ) - # uop is mandatory - we cannot const evaluate it + # uop is non-trivial - we cannot const evaluate it if uop.name in NO_CONST_OR_TYPE_EVALUATE: for in_ in mangled_uop.stack.inputs: out.emit(f"(void){in_.name};\n") return - assert ( - len(uop.stack.outputs) == 1 - ), f"Currently we only support 1 stack output for pure ops: {uop}" - - # Constant prop only handles one output, and no variadic inputs. + # Constant prop handled no variadic inputs. # Perhaps in the future we can support these. if all(input.size == "1" for input in uop.stack.inputs): # We can try a constant evaluation @@ -232,6 +224,9 @@ def _write_body_abstract_interp_pure_uop( ] ) + if predicates: + declare_caches(uop, out) + out.emit(f"if ({predicates or 0}) {{\n") declare_variables(uop, out, default_type="PyObject *") for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): @@ -273,15 +268,6 @@ def _write_body_abstract_interp_guard_uop( if uop.name in NO_CONST_OR_TYPE_EVALUATE: return - for cache in uop.caches: - if cache.name not in UNUSED: - if cache.size == 4: - type = cast = "PyObject *" - else: - type = f"uint{cache.size*16}_t " - cast = f"uint{cache.size*16}_t" - out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - out.emit("// Constant evaluation\n") predicates_str = " && ".join( [ @@ -291,6 +277,7 @@ def _write_body_abstract_interp_guard_uop( ] ) if predicates_str: + declare_caches(uop, out) out.emit(f"if ({predicates_str}) {{\n") declare_variables(uop, out, default_type="PyObject *") for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): From 55f9bcb464a282f252f0f2b74a163d7a3c5c137a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 21:01:55 +0800 Subject: [PATCH 108/111] add type annotation --- Tools/cases_generator/tier2_abstract_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py index 32c7a52a64f2ad..73418c4269906b 100644 --- a/Tools/cases_generator/tier2_abstract_generator.py +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -187,7 +187,7 @@ def new_sym( ) -def declare_caches(uop: Uop, out: CWriter): +def declare_caches(uop: Uop, out: CWriter) -> None: for cache in uop.caches: if cache.name not in UNUSED: if cache.size == 4: From 0726766dc64a8d83fbfd49d5e4c809c453bed38e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 21:17:46 +0800 Subject: [PATCH 109/111] make const zapping more error-proof --- Include/internal/pycore_uop_metadata.h | 211 +++++++++++++++++- Python/bytecodes.c | 8 +- Python/optimizer_analysis.c | 36 +-- .../cases_generator/uop_metadata_generator.py | 11 + 4 files changed, 233 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 40a1e12b515e33..eaf85465a26623 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -204,10 +204,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, [_JUMP_ABSOLUTE_HEADER] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, - [_LOAD_CONST_INLINE] = HAS_SPECIAL_OPT_FLAG, - [_LOAD_CONST_INLINE_BORROW] = HAS_SPECIAL_OPT_FLAG, - [_LOAD_CONST_INLINE_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_CHECK_GLOBALS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, [_CHECK_BUILTINS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, @@ -414,6 +414,209 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { }; #endif // NEED_OPCODE_METADATA +extern const int _PyUop_NetStackEffect[MAX_UOP_ID+1]; +#ifdef NEED_OPCODE_METADATA +const int _PyUop_NetStackEffect[MAX_UOP_ID+1] = { + [_NOP] = 0, + [_RESUME_CHECK] = 0, + [_LOAD_FAST_CHECK] = 1, + [_LOAD_FAST] = 1, + [_LOAD_FAST_AND_CLEAR] = 1, + [_LOAD_FAST_LOAD_FAST] = 2, + [_LOAD_CONST] = 1, + [_STORE_FAST] = -1, + [_STORE_FAST_LOAD_FAST] = 0, + [_STORE_FAST_STORE_FAST] = -2, + [_POP_TOP] = -1, + [_PUSH_NULL] = 1, + [_END_SEND] = -1, + [_UNARY_NEGATIVE] = 0, + [_UNARY_NOT] = 0, + [_TO_BOOL] = 0, + [_TO_BOOL_BOOL] = 0, + [_TO_BOOL_INT] = 0, + [_TO_BOOL_LIST] = 0, + [_TO_BOOL_NONE] = 0, + [_TO_BOOL_STR] = 0, + [_TO_BOOL_ALWAYS_TRUE] = 0, + [_UNARY_INVERT] = 0, + [_GUARD_BOTH_INT] = 0, + [_BINARY_OP_MULTIPLY_INT] = -1, + [_BINARY_OP_ADD_INT] = -1, + [_BINARY_OP_SUBTRACT_INT] = -1, + [_GUARD_BOTH_FLOAT] = 0, + [_BINARY_OP_MULTIPLY_FLOAT] = -1, + [_BINARY_OP_ADD_FLOAT] = -1, + [_BINARY_OP_SUBTRACT_FLOAT] = -1, + [_GUARD_BOTH_UNICODE] = 0, + [_BINARY_OP_ADD_UNICODE] = -1, + [_BINARY_SUBSCR] = -1, + [_BINARY_SLICE] = -2, + [_STORE_SLICE] = -4, + [_BINARY_SUBSCR_LIST_INT] = -1, + [_BINARY_SUBSCR_STR_INT] = -1, + [_BINARY_SUBSCR_TUPLE_INT] = -1, + [_BINARY_SUBSCR_DICT] = -1, + [_LIST_APPEND] = -1, + [_SET_ADD] = -1, + [_STORE_SUBSCR] = -3, + [_STORE_SUBSCR_LIST_INT] = -3, + [_STORE_SUBSCR_DICT] = -3, + [_DELETE_SUBSCR] = -2, + [_CALL_INTRINSIC_1] = 0, + [_CALL_INTRINSIC_2] = -1, + [_POP_FRAME] = -1, + [_GET_AITER] = 0, + [_GET_ANEXT] = 1, + [_GET_AWAITABLE] = 0, + [_POP_EXCEPT] = -1, + [_LOAD_ASSERTION_ERROR] = 1, + [_LOAD_BUILD_CLASS] = 1, + [_STORE_NAME] = -1, + [_DELETE_NAME] = 0, + [_UNPACK_SEQUENCE] = 0, + [_UNPACK_SEQUENCE_TWO_TUPLE] = 0, + [_UNPACK_SEQUENCE_TUPLE] = 0, + [_UNPACK_SEQUENCE_LIST] = 0, + [_UNPACK_EX] = 2, + [_STORE_ATTR] = -2, + [_DELETE_ATTR] = -1, + [_STORE_GLOBAL] = -1, + [_DELETE_GLOBAL] = 0, + [_LOAD_LOCALS] = 1, + [_LOAD_FROM_DICT_OR_GLOBALS] = 0, + [_LOAD_NAME] = 1, + [_LOAD_GLOBAL] = 2, + [_GUARD_GLOBALS_VERSION] = 0, + [_GUARD_BUILTINS_VERSION] = 0, + [_LOAD_GLOBAL_MODULE] = 2, + [_LOAD_GLOBAL_BUILTINS] = 2, + [_DELETE_FAST] = 0, + [_MAKE_CELL] = 0, + [_DELETE_DEREF] = 0, + [_LOAD_FROM_DICT_OR_DEREF] = 0, + [_LOAD_DEREF] = 1, + [_STORE_DEREF] = -1, + [_COPY_FREE_VARS] = 0, + [_BUILD_STRING] = 0, + [_BUILD_TUPLE] = 0, + [_BUILD_LIST] = 0, + [_LIST_EXTEND] = -1, + [_SET_UPDATE] = -1, + [_BUILD_SET] = 0, + [_BUILD_MAP] = 0, + [_SETUP_ANNOTATIONS] = 0, + [_BUILD_CONST_KEY_MAP] = -1, + [_DICT_UPDATE] = -1, + [_DICT_MERGE] = -1, + [_MAP_ADD] = -2, + [_LOAD_SUPER_ATTR_ATTR] = -1, + [_LOAD_SUPER_ATTR_METHOD] = -1, + [_LOAD_ATTR] = 1, + [_GUARD_TYPE_VERSION] = 0, + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = 0, + [_LOAD_ATTR_INSTANCE_VALUE] = 1, + [_CHECK_ATTR_MODULE] = 0, + [_LOAD_ATTR_MODULE] = 1, + [_CHECK_ATTR_WITH_HINT] = 0, + [_LOAD_ATTR_WITH_HINT] = 1, + [_LOAD_ATTR_SLOT] = 1, + [_CHECK_ATTR_CLASS] = 0, + [_LOAD_ATTR_CLASS] = 1, + [_GUARD_DORV_VALUES] = 0, + [_STORE_ATTR_INSTANCE_VALUE] = -2, + [_STORE_ATTR_SLOT] = -2, + [_COMPARE_OP] = -1, + [_COMPARE_OP_FLOAT] = -1, + [_COMPARE_OP_INT] = -1, + [_COMPARE_OP_STR] = -1, + [_IS_OP] = -1, + [_CONTAINS_OP] = -1, + [_CHECK_EG_MATCH] = 0, + [_CHECK_EXC_MATCH] = 0, + [_IS_NONE] = 0, + [_GET_LEN] = 1, + [_MATCH_CLASS] = -2, + [_MATCH_MAPPING] = 1, + [_MATCH_SEQUENCE] = 1, + [_MATCH_KEYS] = 1, + [_GET_ITER] = 0, + [_GET_YIELD_FROM_ITER] = 0, + [_FOR_ITER_TIER_TWO] = 1, + [_ITER_CHECK_LIST] = 0, + [_GUARD_NOT_EXHAUSTED_LIST] = 0, + [_ITER_NEXT_LIST] = 1, + [_ITER_CHECK_TUPLE] = 0, + [_GUARD_NOT_EXHAUSTED_TUPLE] = 0, + [_ITER_NEXT_TUPLE] = 1, + [_ITER_CHECK_RANGE] = 0, + [_GUARD_NOT_EXHAUSTED_RANGE] = 0, + [_ITER_NEXT_RANGE] = 1, + [_BEFORE_ASYNC_WITH] = 1, + [_BEFORE_WITH] = 1, + [_WITH_EXCEPT_START] = 1, + [_PUSH_EXC_INFO] = 1, + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = 0, + [_GUARD_KEYS_VERSION] = 0, + [_LOAD_ATTR_METHOD_WITH_VALUES] = 1, + [_LOAD_ATTR_METHOD_NO_DICT] = 1, + [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = 1, + [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = 1, + [_CHECK_ATTR_METHOD_LAZY_DICT] = 0, + [_LOAD_ATTR_METHOD_LAZY_DICT] = 1, + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = 0, + [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = 0, + [_CHECK_PEP_523] = 0, + [_CHECK_FUNCTION_EXACT_ARGS] = 0, + [_CHECK_STACK_SPACE] = 0, + [_INIT_CALL_PY_EXACT_ARGS] = -2, + [_PUSH_FRAME] = 0, + [_CALL_TYPE_1] = -2, + [_CALL_STR_1] = -2, + [_CALL_TUPLE_1] = -2, + [_EXIT_INIT_CHECK] = -1, + [_CALL_BUILTIN_CLASS] = -2, + [_CALL_BUILTIN_O] = -2, + [_CALL_BUILTIN_FAST] = -2, + [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = -2, + [_CALL_LEN] = -2, + [_CALL_ISINSTANCE] = -2, + [_CALL_METHOD_DESCRIPTOR_O] = -2, + [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = -2, + [_CALL_METHOD_DESCRIPTOR_NOARGS] = -2, + [_CALL_METHOD_DESCRIPTOR_FAST] = -2, + [_MAKE_FUNCTION] = 0, + [_SET_FUNCTION_ATTRIBUTE] = -1, + [_BUILD_SLICE] = -2, + [_CONVERT_VALUE] = 0, + [_FORMAT_SIMPLE] = 0, + [_FORMAT_WITH_SPEC] = -1, + [_COPY] = 1, + [_BINARY_OP] = -1, + [_SWAP] = 0, + [_GUARD_IS_TRUE_POP] = -1, + [_GUARD_IS_FALSE_POP] = -1, + [_GUARD_IS_NONE_POP] = -1, + [_GUARD_IS_NOT_NONE_POP] = -1, + [_JUMP_TO_TOP] = 0, + [_SET_IP] = 0, + [_SAVE_RETURN_OFFSET] = 0, + [_EXIT_TRACE] = 0, + [_JUMP_ABSOLUTE] = 0, + [_JUMP_ABSOLUTE_HEADER] = 0, + [_CHECK_VALIDITY] = 0, + [_LOAD_CONST_INLINE] = 1, + [_LOAD_CONST_INLINE_BORROW] = 1, + [_LOAD_CONST_INLINE_WITH_NULL] = 2, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 2, + [_CHECK_GLOBALS] = 0, + [_CHECK_BUILTINS] = 0, + [_INTERNAL_INCREMENT_OPT_COUNTER] = -1, + [_SHRINK_STACK] = -1, +}; + +#endif // NEED_OPCODE_METADATA + #ifdef __cplusplus } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1b3db2c81b759c..6e22f07b69ae2d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4078,23 +4078,23 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { TIER_TWO_ONLY value = Py_NewRef(ptr); } - op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { TIER_TWO_ONLY value = ptr; } - op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = Py_NewRef(ptr); null = NULL; } - op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = ptr; null = NULL; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index ba280fa102fae6..434ebd5152cc8e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -556,22 +556,6 @@ op_is_zappable(int opcode) } } -static inline bool -op_count_loads(int opcode) -{ - switch(opcode) { - case _LOAD_CONST_INLINE: - case _LOAD_CONST: - case _LOAD_FAST: - case _LOAD_CONST_INLINE_BORROW: - return 1; - case _LOAD_CONST_INLINE_WITH_NULL: - case _LOAD_CONST_INLINE_BORROW_WITH_NULL: - return 2; - default: - return 0; - } -} static inline int emit_const(uops_emitter *emitter, @@ -579,22 +563,24 @@ emit_const(uops_emitter *emitter, int num_pops) { _PyUOpInstruction shrink_stack = {_SHRINK_STACK, num_pops, 0, 0}; - // If all that precedes a _SHRINK_STACK is a bunch of loads, - // then we can safely eliminate that without side effects. - int load_count = 0; + // If all that precedes a _SHRINK_STACK is a bunch of pure instructions, + // then we can safely eliminate that without side effects + int net_stack_effect = -num_pops; _PyUOpInstruction *back = emitter->writebuffer + emitter->curr_i - 1; while (back >= emitter->writebuffer && - load_count < num_pops && op_is_zappable(back->opcode)) { - load_count += op_count_loads(back->opcode); + net_stack_effect += _PyUop_NetStackEffect[back->opcode]; back--; + if (net_stack_effect == 0) { + break; + } } - if (load_count == num_pops) { + if (net_stack_effect == 0) { back = emitter->writebuffer + emitter->curr_i - 1; - load_count = 0; + net_stack_effect = -num_pops; // Back up over the previous loads and zap them. - while(load_count < num_pops) { - load_count += op_count_loads(back->opcode); + while(net_stack_effect != 0) { + net_stack_effect += _PyUop_NetStackEffect[back->opcode]; if (back->opcode == _LOAD_CONST_INLINE || back->opcode == _LOAD_CONST_INLINE_WITH_NULL) { PyObject *old_const_val = (PyObject *)back->operand; diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index d4f3a096d2acc1..45944d20d67e3a 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -40,6 +40,16 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("#endif // NEED_OPCODE_METADATA\n\n") +def generate_net_stack_effect(analysis: Analysis, out: CWriter) -> None: + out.emit("extern const int _PyUop_NetStackEffect[MAX_UOP_ID+1];\n") + out.emit("#ifdef NEED_OPCODE_METADATA\n") + out.emit("const int _PyUop_NetStackEffect[MAX_UOP_ID+1] = {\n") + for uop in analysis.uops.values(): + if uop.is_viable() and not uop.properties.tier_one_only: + out.emit(f"[{uop.name}] = {len(uop.stack.outputs) - len(uop.stack.inputs)},\n") + out.emit("};\n\n") + out.emit("#endif // NEED_OPCODE_METADATA\n\n") + def generate_uop_metadata( filenames: list[str], analysis: Analysis, outfile: TextIO ) -> None: @@ -49,6 +59,7 @@ def generate_uop_metadata( out.emit("#include \n") out.emit('#include "pycore_uop_ids.h"\n') generate_names_and_flags(analysis, out) + generate_net_stack_effect(analysis, out) arg_parser = argparse.ArgumentParser( From a8adada2a1c4aec78aa1d7763e3d27b47b4f1df6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 4 Feb 2024 21:50:28 +0800 Subject: [PATCH 110/111] fix stack metadata --- Include/internal/pycore_uop_metadata.h | 595 ++++++++++++------ Python/optimizer_analysis.c | 4 +- .../cases_generator/uop_metadata_generator.py | 16 +- 3 files changed, 412 insertions(+), 203 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index eaf85465a26623..6aed6163ca19b9 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -414,205 +414,404 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { }; #endif // NEED_OPCODE_METADATA -extern const int _PyUop_NetStackEffect[MAX_UOP_ID+1]; +extern int _PyUop_NetStackEffect(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA -const int _PyUop_NetStackEffect[MAX_UOP_ID+1] = { - [_NOP] = 0, - [_RESUME_CHECK] = 0, - [_LOAD_FAST_CHECK] = 1, - [_LOAD_FAST] = 1, - [_LOAD_FAST_AND_CLEAR] = 1, - [_LOAD_FAST_LOAD_FAST] = 2, - [_LOAD_CONST] = 1, - [_STORE_FAST] = -1, - [_STORE_FAST_LOAD_FAST] = 0, - [_STORE_FAST_STORE_FAST] = -2, - [_POP_TOP] = -1, - [_PUSH_NULL] = 1, - [_END_SEND] = -1, - [_UNARY_NEGATIVE] = 0, - [_UNARY_NOT] = 0, - [_TO_BOOL] = 0, - [_TO_BOOL_BOOL] = 0, - [_TO_BOOL_INT] = 0, - [_TO_BOOL_LIST] = 0, - [_TO_BOOL_NONE] = 0, - [_TO_BOOL_STR] = 0, - [_TO_BOOL_ALWAYS_TRUE] = 0, - [_UNARY_INVERT] = 0, - [_GUARD_BOTH_INT] = 0, - [_BINARY_OP_MULTIPLY_INT] = -1, - [_BINARY_OP_ADD_INT] = -1, - [_BINARY_OP_SUBTRACT_INT] = -1, - [_GUARD_BOTH_FLOAT] = 0, - [_BINARY_OP_MULTIPLY_FLOAT] = -1, - [_BINARY_OP_ADD_FLOAT] = -1, - [_BINARY_OP_SUBTRACT_FLOAT] = -1, - [_GUARD_BOTH_UNICODE] = 0, - [_BINARY_OP_ADD_UNICODE] = -1, - [_BINARY_SUBSCR] = -1, - [_BINARY_SLICE] = -2, - [_STORE_SLICE] = -4, - [_BINARY_SUBSCR_LIST_INT] = -1, - [_BINARY_SUBSCR_STR_INT] = -1, - [_BINARY_SUBSCR_TUPLE_INT] = -1, - [_BINARY_SUBSCR_DICT] = -1, - [_LIST_APPEND] = -1, - [_SET_ADD] = -1, - [_STORE_SUBSCR] = -3, - [_STORE_SUBSCR_LIST_INT] = -3, - [_STORE_SUBSCR_DICT] = -3, - [_DELETE_SUBSCR] = -2, - [_CALL_INTRINSIC_1] = 0, - [_CALL_INTRINSIC_2] = -1, - [_POP_FRAME] = -1, - [_GET_AITER] = 0, - [_GET_ANEXT] = 1, - [_GET_AWAITABLE] = 0, - [_POP_EXCEPT] = -1, - [_LOAD_ASSERTION_ERROR] = 1, - [_LOAD_BUILD_CLASS] = 1, - [_STORE_NAME] = -1, - [_DELETE_NAME] = 0, - [_UNPACK_SEQUENCE] = 0, - [_UNPACK_SEQUENCE_TWO_TUPLE] = 0, - [_UNPACK_SEQUENCE_TUPLE] = 0, - [_UNPACK_SEQUENCE_LIST] = 0, - [_UNPACK_EX] = 2, - [_STORE_ATTR] = -2, - [_DELETE_ATTR] = -1, - [_STORE_GLOBAL] = -1, - [_DELETE_GLOBAL] = 0, - [_LOAD_LOCALS] = 1, - [_LOAD_FROM_DICT_OR_GLOBALS] = 0, - [_LOAD_NAME] = 1, - [_LOAD_GLOBAL] = 2, - [_GUARD_GLOBALS_VERSION] = 0, - [_GUARD_BUILTINS_VERSION] = 0, - [_LOAD_GLOBAL_MODULE] = 2, - [_LOAD_GLOBAL_BUILTINS] = 2, - [_DELETE_FAST] = 0, - [_MAKE_CELL] = 0, - [_DELETE_DEREF] = 0, - [_LOAD_FROM_DICT_OR_DEREF] = 0, - [_LOAD_DEREF] = 1, - [_STORE_DEREF] = -1, - [_COPY_FREE_VARS] = 0, - [_BUILD_STRING] = 0, - [_BUILD_TUPLE] = 0, - [_BUILD_LIST] = 0, - [_LIST_EXTEND] = -1, - [_SET_UPDATE] = -1, - [_BUILD_SET] = 0, - [_BUILD_MAP] = 0, - [_SETUP_ANNOTATIONS] = 0, - [_BUILD_CONST_KEY_MAP] = -1, - [_DICT_UPDATE] = -1, - [_DICT_MERGE] = -1, - [_MAP_ADD] = -2, - [_LOAD_SUPER_ATTR_ATTR] = -1, - [_LOAD_SUPER_ATTR_METHOD] = -1, - [_LOAD_ATTR] = 1, - [_GUARD_TYPE_VERSION] = 0, - [_CHECK_MANAGED_OBJECT_HAS_VALUES] = 0, - [_LOAD_ATTR_INSTANCE_VALUE] = 1, - [_CHECK_ATTR_MODULE] = 0, - [_LOAD_ATTR_MODULE] = 1, - [_CHECK_ATTR_WITH_HINT] = 0, - [_LOAD_ATTR_WITH_HINT] = 1, - [_LOAD_ATTR_SLOT] = 1, - [_CHECK_ATTR_CLASS] = 0, - [_LOAD_ATTR_CLASS] = 1, - [_GUARD_DORV_VALUES] = 0, - [_STORE_ATTR_INSTANCE_VALUE] = -2, - [_STORE_ATTR_SLOT] = -2, - [_COMPARE_OP] = -1, - [_COMPARE_OP_FLOAT] = -1, - [_COMPARE_OP_INT] = -1, - [_COMPARE_OP_STR] = -1, - [_IS_OP] = -1, - [_CONTAINS_OP] = -1, - [_CHECK_EG_MATCH] = 0, - [_CHECK_EXC_MATCH] = 0, - [_IS_NONE] = 0, - [_GET_LEN] = 1, - [_MATCH_CLASS] = -2, - [_MATCH_MAPPING] = 1, - [_MATCH_SEQUENCE] = 1, - [_MATCH_KEYS] = 1, - [_GET_ITER] = 0, - [_GET_YIELD_FROM_ITER] = 0, - [_FOR_ITER_TIER_TWO] = 1, - [_ITER_CHECK_LIST] = 0, - [_GUARD_NOT_EXHAUSTED_LIST] = 0, - [_ITER_NEXT_LIST] = 1, - [_ITER_CHECK_TUPLE] = 0, - [_GUARD_NOT_EXHAUSTED_TUPLE] = 0, - [_ITER_NEXT_TUPLE] = 1, - [_ITER_CHECK_RANGE] = 0, - [_GUARD_NOT_EXHAUSTED_RANGE] = 0, - [_ITER_NEXT_RANGE] = 1, - [_BEFORE_ASYNC_WITH] = 1, - [_BEFORE_WITH] = 1, - [_WITH_EXCEPT_START] = 1, - [_PUSH_EXC_INFO] = 1, - [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = 0, - [_GUARD_KEYS_VERSION] = 0, - [_LOAD_ATTR_METHOD_WITH_VALUES] = 1, - [_LOAD_ATTR_METHOD_NO_DICT] = 1, - [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = 1, - [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = 1, - [_CHECK_ATTR_METHOD_LAZY_DICT] = 0, - [_LOAD_ATTR_METHOD_LAZY_DICT] = 1, - [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = 0, - [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = 0, - [_CHECK_PEP_523] = 0, - [_CHECK_FUNCTION_EXACT_ARGS] = 0, - [_CHECK_STACK_SPACE] = 0, - [_INIT_CALL_PY_EXACT_ARGS] = -2, - [_PUSH_FRAME] = 0, - [_CALL_TYPE_1] = -2, - [_CALL_STR_1] = -2, - [_CALL_TUPLE_1] = -2, - [_EXIT_INIT_CHECK] = -1, - [_CALL_BUILTIN_CLASS] = -2, - [_CALL_BUILTIN_O] = -2, - [_CALL_BUILTIN_FAST] = -2, - [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = -2, - [_CALL_LEN] = -2, - [_CALL_ISINSTANCE] = -2, - [_CALL_METHOD_DESCRIPTOR_O] = -2, - [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = -2, - [_CALL_METHOD_DESCRIPTOR_NOARGS] = -2, - [_CALL_METHOD_DESCRIPTOR_FAST] = -2, - [_MAKE_FUNCTION] = 0, - [_SET_FUNCTION_ATTRIBUTE] = -1, - [_BUILD_SLICE] = -2, - [_CONVERT_VALUE] = 0, - [_FORMAT_SIMPLE] = 0, - [_FORMAT_WITH_SPEC] = -1, - [_COPY] = 1, - [_BINARY_OP] = -1, - [_SWAP] = 0, - [_GUARD_IS_TRUE_POP] = -1, - [_GUARD_IS_FALSE_POP] = -1, - [_GUARD_IS_NONE_POP] = -1, - [_GUARD_IS_NOT_NONE_POP] = -1, - [_JUMP_TO_TOP] = 0, - [_SET_IP] = 0, - [_SAVE_RETURN_OFFSET] = 0, - [_EXIT_TRACE] = 0, - [_JUMP_ABSOLUTE] = 0, - [_JUMP_ABSOLUTE_HEADER] = 0, - [_CHECK_VALIDITY] = 0, - [_LOAD_CONST_INLINE] = 1, - [_LOAD_CONST_INLINE_BORROW] = 1, - [_LOAD_CONST_INLINE_WITH_NULL] = 2, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 2, - [_CHECK_GLOBALS] = 0, - [_CHECK_BUILTINS] = 0, - [_INTERNAL_INCREMENT_OPT_COUNTER] = -1, - [_SHRINK_STACK] = -1, +int _PyUop_NetStackEffect(int opcode, int oparg) { + switch (opcode) { + case _NOP: + return (0); + case _RESUME_CHECK: + return (0); + case _LOAD_FAST_CHECK: + return (1); + case _LOAD_FAST: + return (1); + case _LOAD_FAST_AND_CLEAR: + return (1); + case _LOAD_FAST_LOAD_FAST: + return (2); + case _LOAD_CONST: + return (1); + case _STORE_FAST: + return (-1); + case _STORE_FAST_LOAD_FAST: + return (0); + case _STORE_FAST_STORE_FAST: + return (-2); + case _POP_TOP: + return (-1); + case _PUSH_NULL: + return (1); + case _END_SEND: + return (-1); + case _UNARY_NEGATIVE: + return (0); + case _UNARY_NOT: + return (0); + case _TO_BOOL: + return (0); + case _TO_BOOL_BOOL: + return (0); + case _TO_BOOL_INT: + return (0); + case _TO_BOOL_LIST: + return (0); + case _TO_BOOL_NONE: + return (0); + case _TO_BOOL_STR: + return (0); + case _TO_BOOL_ALWAYS_TRUE: + return (0); + case _UNARY_INVERT: + return (0); + case _GUARD_BOTH_INT: + return (0); + case _BINARY_OP_MULTIPLY_INT: + return (-1); + case _BINARY_OP_ADD_INT: + return (-1); + case _BINARY_OP_SUBTRACT_INT: + return (-1); + case _GUARD_BOTH_FLOAT: + return (0); + case _BINARY_OP_MULTIPLY_FLOAT: + return (-1); + case _BINARY_OP_ADD_FLOAT: + return (-1); + case _BINARY_OP_SUBTRACT_FLOAT: + return (-1); + case _GUARD_BOTH_UNICODE: + return (0); + case _BINARY_OP_ADD_UNICODE: + return (-1); + case _BINARY_SUBSCR: + return (-1); + case _BINARY_SLICE: + return (-2); + case _STORE_SLICE: + return (-4); + case _BINARY_SUBSCR_LIST_INT: + return (-1); + case _BINARY_SUBSCR_STR_INT: + return (-1); + case _BINARY_SUBSCR_TUPLE_INT: + return (-1); + case _BINARY_SUBSCR_DICT: + return (-1); + case _LIST_APPEND: + return (-1); + case _SET_ADD: + return (-1); + case _STORE_SUBSCR: + return (-3); + case _STORE_SUBSCR_LIST_INT: + return (-3); + case _STORE_SUBSCR_DICT: + return (-3); + case _DELETE_SUBSCR: + return (-2); + case _CALL_INTRINSIC_1: + return (0); + case _CALL_INTRINSIC_2: + return (-1); + case _POP_FRAME: + return (-1); + case _GET_AITER: + return (0); + case _GET_ANEXT: + return (1); + case _GET_AWAITABLE: + return (0); + case _POP_EXCEPT: + return (-1); + case _LOAD_ASSERTION_ERROR: + return (1); + case _LOAD_BUILD_CLASS: + return (1); + case _STORE_NAME: + return (-1); + case _DELETE_NAME: + return (0); + case _UNPACK_SEQUENCE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_TWO_TUPLE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_TUPLE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_LIST: + return (-1 + oparg); + case _UNPACK_EX: + return ((oparg >> 8) + (oparg & 0xFF)); + case _STORE_ATTR: + return (-2); + case _DELETE_ATTR: + return (-1); + case _STORE_GLOBAL: + return (-1); + case _DELETE_GLOBAL: + return (0); + case _LOAD_LOCALS: + return (1); + case _LOAD_FROM_DICT_OR_GLOBALS: + return (0); + case _LOAD_NAME: + return (1); + case _LOAD_GLOBAL: + return (1 + (oparg & 1)); + case _GUARD_GLOBALS_VERSION: + return (0); + case _GUARD_BUILTINS_VERSION: + return (0); + case _LOAD_GLOBAL_MODULE: + return (1 + (oparg & 1)); + case _LOAD_GLOBAL_BUILTINS: + return (1 + (oparg & 1)); + case _DELETE_FAST: + return (0); + case _MAKE_CELL: + return (0); + case _DELETE_DEREF: + return (0); + case _LOAD_FROM_DICT_OR_DEREF: + return (0); + case _LOAD_DEREF: + return (1); + case _STORE_DEREF: + return (-1); + case _COPY_FREE_VARS: + return (0); + case _BUILD_STRING: + return (1 - oparg); + case _BUILD_TUPLE: + return (1 - oparg); + case _BUILD_LIST: + return (1 - oparg); + case _LIST_EXTEND: + return (-1); + case _SET_UPDATE: + return (-1); + case _BUILD_SET: + return (1 - oparg); + case _BUILD_MAP: + return (1 - oparg*2); + case _SETUP_ANNOTATIONS: + return (0); + case _BUILD_CONST_KEY_MAP: + return (-oparg); + case _DICT_UPDATE: + return (-1); + case _DICT_MERGE: + return (-1); + case _MAP_ADD: + return (-2); + case _LOAD_SUPER_ATTR_ATTR: + return (-2 + ((0) ? 1 : 0)); + case _LOAD_SUPER_ATTR_METHOD: + return (-1); + case _LOAD_ATTR: + return ((oparg & 1)); + case _GUARD_TYPE_VERSION: + return (0); + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return (0); + case _LOAD_ATTR_INSTANCE_VALUE: + return ((oparg & 1)); + case _CHECK_ATTR_MODULE: + return (0); + case _LOAD_ATTR_MODULE: + return ((oparg & 1)); + case _CHECK_ATTR_WITH_HINT: + return (0); + case _LOAD_ATTR_WITH_HINT: + return ((oparg & 1)); + case _LOAD_ATTR_SLOT: + return ((oparg & 1)); + case _CHECK_ATTR_CLASS: + return (0); + case _LOAD_ATTR_CLASS: + return ((oparg & 1)); + case _GUARD_DORV_VALUES: + return (0); + case _STORE_ATTR_INSTANCE_VALUE: + return (-2); + case _STORE_ATTR_SLOT: + return (-2); + case _COMPARE_OP: + return (-1); + case _COMPARE_OP_FLOAT: + return (-1); + case _COMPARE_OP_INT: + return (-1); + case _COMPARE_OP_STR: + return (-1); + case _IS_OP: + return (-1); + case _CONTAINS_OP: + return (-1); + case _CHECK_EG_MATCH: + return (0); + case _CHECK_EXC_MATCH: + return (0); + case _IS_NONE: + return (0); + case _GET_LEN: + return (1); + case _MATCH_CLASS: + return (-2); + case _MATCH_MAPPING: + return (1); + case _MATCH_SEQUENCE: + return (1); + case _MATCH_KEYS: + return (1); + case _GET_ITER: + return (0); + case _GET_YIELD_FROM_ITER: + return (0); + case _FOR_ITER_TIER_TWO: + return (1); + case _ITER_CHECK_LIST: + return (0); + case _GUARD_NOT_EXHAUSTED_LIST: + return (0); + case _ITER_NEXT_LIST: + return (1); + case _ITER_CHECK_TUPLE: + return (0); + case _GUARD_NOT_EXHAUSTED_TUPLE: + return (0); + case _ITER_NEXT_TUPLE: + return (1); + case _ITER_CHECK_RANGE: + return (0); + case _GUARD_NOT_EXHAUSTED_RANGE: + return (0); + case _ITER_NEXT_RANGE: + return (1); + case _BEFORE_ASYNC_WITH: + return (1); + case _BEFORE_WITH: + return (1); + case _WITH_EXCEPT_START: + return (1); + case _PUSH_EXC_INFO: + return (1); + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: + return (0); + case _GUARD_KEYS_VERSION: + return (0); + case _LOAD_ATTR_METHOD_WITH_VALUES: + return (((1) ? 1 : 0)); + case _LOAD_ATTR_METHOD_NO_DICT: + return (((1) ? 1 : 0)); + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return (((0) ? 1 : 0)); + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return (((0) ? 1 : 0)); + case _CHECK_ATTR_METHOD_LAZY_DICT: + return (0); + case _LOAD_ATTR_METHOD_LAZY_DICT: + return (((1) ? 1 : 0)); + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: + return (0); + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: + return (0); + case _CHECK_PEP_523: + return (0); + case _CHECK_FUNCTION_EXACT_ARGS: + return (0); + case _CHECK_STACK_SPACE: + return (0); + case _INIT_CALL_PY_EXACT_ARGS: + return (-1 - oparg); + case _PUSH_FRAME: + return (-1 + ((0) ? 1 : 0)); + case _CALL_TYPE_1: + return (-1 - oparg); + case _CALL_STR_1: + return (-1 - oparg); + case _CALL_TUPLE_1: + return (-1 - oparg); + case _EXIT_INIT_CHECK: + return (-1); + case _CALL_BUILTIN_CLASS: + return (-1 - oparg); + case _CALL_BUILTIN_O: + return (-1 - oparg); + case _CALL_BUILTIN_FAST: + return (-1 - oparg); + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: + return (-1 - oparg); + case _CALL_LEN: + return (-1 - oparg); + case _CALL_ISINSTANCE: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_O: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_NOARGS: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_FAST: + return (-1 - oparg); + case _MAKE_FUNCTION: + return (0); + case _SET_FUNCTION_ATTRIBUTE: + return (-1); + case _BUILD_SLICE: + return (-1 - ((oparg == 3) ? 1 : 0)); + case _CONVERT_VALUE: + return (0); + case _FORMAT_SIMPLE: + return (0); + case _FORMAT_WITH_SPEC: + return (-1); + case _COPY: + return (1); + case _BINARY_OP: + return (-1); + case _SWAP: + return (0); + case _GUARD_IS_TRUE_POP: + return (-1); + case _GUARD_IS_FALSE_POP: + return (-1); + case _GUARD_IS_NONE_POP: + return (-1); + case _GUARD_IS_NOT_NONE_POP: + return (-1); + case _JUMP_TO_TOP: + return (0); + case _SET_IP: + return (0); + case _SAVE_RETURN_OFFSET: + return (0); + case _EXIT_TRACE: + return (0); + case _JUMP_ABSOLUTE: + return (0); + case _JUMP_ABSOLUTE_HEADER: + return (0); + case _CHECK_VALIDITY: + return (0); + case _LOAD_CONST_INLINE: + return (1); + case _LOAD_CONST_INLINE_BORROW: + return (1); + case _LOAD_CONST_INLINE_WITH_NULL: + return (2); + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + return (2); + case _CHECK_GLOBALS: + return (0); + case _CHECK_BUILTINS: + return (0); + case _INTERNAL_INCREMENT_OPT_COUNTER: + return (-1); + case _SHRINK_STACK: + return (-oparg); + default: Py_UNREACHABLE(); + }; }; #endif // NEED_OPCODE_METADATA diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 434ebd5152cc8e..e4c783b71f65d3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -569,7 +569,7 @@ emit_const(uops_emitter *emitter, _PyUOpInstruction *back = emitter->writebuffer + emitter->curr_i - 1; while (back >= emitter->writebuffer && op_is_zappable(back->opcode)) { - net_stack_effect += _PyUop_NetStackEffect[back->opcode]; + net_stack_effect += _PyUop_NetStackEffect(back->opcode, back->oparg); back--; if (net_stack_effect == 0) { break; @@ -580,7 +580,7 @@ emit_const(uops_emitter *emitter, net_stack_effect = -num_pops; // Back up over the previous loads and zap them. while(net_stack_effect != 0) { - net_stack_effect += _PyUop_NetStackEffect[back->opcode]; + net_stack_effect += _PyUop_NetStackEffect(back->opcode, back->oparg); if (back->opcode == _LOAD_CONST_INLINE || back->opcode == _LOAD_CONST_INLINE_WITH_NULL) { PyObject *old_const_val = (PyObject *)back->operand; diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 45944d20d67e3a..5d5f70076f9d99 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -15,6 +15,7 @@ write_header, cflags, ) +from stack import Stack from cwriter import CWriter from typing import TextIO @@ -41,12 +42,21 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: def generate_net_stack_effect(analysis: Analysis, out: CWriter) -> None: - out.emit("extern const int _PyUop_NetStackEffect[MAX_UOP_ID+1];\n") + out.emit("extern int _PyUop_NetStackEffect(int opcode, int oparg);\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") - out.emit("const int _PyUop_NetStackEffect[MAX_UOP_ID+1] = {\n") + out.emit("int _PyUop_NetStackEffect(int opcode, int oparg) {\n") + out.emit("switch (opcode) {\n") for uop in analysis.uops.values(): if uop.is_viable() and not uop.properties.tier_one_only: - out.emit(f"[{uop.name}] = {len(uop.stack.outputs) - len(uop.stack.inputs)},\n") + out.emit(f"case {uop.name}:\n") + stack = Stack() + for inputs in uop.stack.inputs: + stack.pop(inputs) + for outputs in uop.stack.outputs: + stack.push(outputs) + out.emit(f"return ({stack.top_offset.to_c()});\n") + out.emit("default: Py_UNREACHABLE();\n") + out.emit("};\n") out.emit("};\n\n") out.emit("#endif // NEED_OPCODE_METADATA\n\n") From ab60387375a2252d71bdec12bd4643b185a07356 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:53:17 +0800 Subject: [PATCH 111/111] address reviews --- Lib/test/test_capi/test_opt.py | 4 +- Python/optimizer_analysis.c | 92 ++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 490ad236d581b8..3b0c2532d72528 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -609,7 +609,7 @@ def testfunc(loops): binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) - self.assertEqual(len(guard_both_int_count), 1) + self.assertLessEqual(len(guard_both_int_count), 1) def test_int_impure_region(self): def testfunc(loops): @@ -869,7 +869,7 @@ def testfunc(loops): uops = {opname for opname, _, _ in ex} self.assertNotIn("_SHRINK_STACK", uops) iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] - self.assertGreaterEqual(len(iter_next_count), 2) + self.assertLessEqual(len(iter_next_count), 2) def test_call_py_exact_args_disappearing(self): def dummy(x): diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e4c783b71f65d3..774d956df0f516 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -152,7 +152,7 @@ typedef struct _Py_UOpsAbstractInterpContext { uops_emitter emitter; - _Py_UOpsSymType **water_level; + _Py_UOpsSymType **n_consumed; _Py_UOpsSymType **limit; _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; } _Py_UOpsAbstractInterpContext; @@ -166,7 +166,7 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); _Py_UOpsSymType **sym_consts = ctx->limit - co_const_len; ctx->limit -= co_const_len; - if (ctx->limit <= ctx->water_level) { + if (ctx->limit <= ctx->n_consumed) { return NULL; } for (Py_ssize_t i = 0; i < co_const_len; i++) { @@ -182,12 +182,13 @@ create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); -// 0 on success, anything else is error. +// 0 on success, -1 on error. static int ctx_frame_push( _Py_UOpsAbstractInterpContext *ctx, PyCodeObject *co, _Py_UOpsSymType **localsplus_start, + int n_locals_already_filled, int curr_stackentries ) { @@ -207,14 +208,14 @@ ctx_frame_push( frame->locals = localsplus_start; frame->stack = frame->locals + co->co_nlocalsplus; frame->stack_pointer = frame->stack + curr_stackentries; - ctx->water_level = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); - if (ctx->water_level >= ctx->limit) { + ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + if (ctx->n_consumed >= ctx->limit) { return -1; } // Initialize with the initial state of all local variables - for (int i = 0; i < co->co_nlocalsplus; i++) { + for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { _Py_UOpsSymType *local = sym_init_unknown(ctx); if (local == NULL) { return -1; @@ -237,52 +238,52 @@ ctx_frame_push( } static void -abstractinterp_fini(_Py_UOpsAbstractInterpContext *self) +abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx) { - if (self == NULL) { + if (ctx == NULL) { return; } - self->curr_frame_depth = 0; - int tys = self->t_arena.ty_curr_number; + ctx->curr_frame_depth = 0; + int tys = ctx->t_arena.ty_curr_number; for (int i = 0; i < tys; i++) { - Py_CLEAR(self->t_arena.arena[i].const_val); + Py_CLEAR(ctx->t_arena.arena[i].const_val); } } static int -abstractinterp_init( - _Py_UOpsAbstractInterpContext *self, +abstractcontext_init( + _Py_UOpsAbstractInterpContext *ctx, PyCodeObject *co, int curr_stacklen, int ir_entries, _PyUOpInstruction *new_writebuffer ) { - self->limit = self->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; - self->water_level = self->locals_and_stack; + ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + ctx->n_consumed = ctx->locals_and_stack; #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - self->locals_and_stack[i] = NULL; + ctx->locals_and_stack[i] = NULL; } #endif // Setup the arena for sym expressions. - self->t_arena.ty_curr_number = 0; - self->t_arena.ty_max_number = TY_ARENA_SIZE; + ctx->t_arena.ty_curr_number = 0; + ctx->t_arena.ty_max_number = TY_ARENA_SIZE; // Frame setup - self->curr_frame_depth = 0; - if (ctx_frame_push(self, co, self->water_level, curr_stacklen) < 0) { + ctx->curr_frame_depth = 0; + if (ctx_frame_push(ctx, co, ctx->n_consumed, 0, curr_stacklen) < 0) { return -1; } // IR and sym setup - self->frequent_syms.push_nulL_sym = NULL; + ctx->frequent_syms.push_nulL_sym = NULL; // Emitter setup - self->emitter.writebuffer = new_writebuffer; - self->emitter.curr_i = 0; - self->emitter.writebuffer_end = new_writebuffer + ir_entries; + ctx->emitter.writebuffer = new_writebuffer; + ctx->emitter.curr_i = 0; + ctx->emitter.writebuffer_end = new_writebuffer + ir_entries; return 0; } @@ -318,7 +319,7 @@ ctx_frame_pop( { _Py_UOpsAbstractFrame *frame = ctx->frame; - ctx->water_level = frame->locals; + ctx->n_consumed = frame->locals; ctx->curr_frame_depth--; assert(ctx->curr_frame_depth >= 1); ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; @@ -346,9 +347,7 @@ _Py_UOpsSymType_New(_Py_UOpsAbstractInterpContext *ctx, self->types = 0; if (const_val != NULL) { - Py_INCREF(const_val); - sym_set_type_from_const(self, const_val); - self->const_val = const_val; + self->const_val = Py_NewRef(const_val); } return self; @@ -364,6 +363,16 @@ sym_set_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinem } } +// We need to clear the type information on every escaping/impure instruction. +// Consider the following code +/* +foo.attr +bar() # opaque call +foo.attr +*/ +// We can't propagate the type information of foo.attr over across bar +// (at least, not without re-installing guards). `bar()` may call random code +// that invalidates foo's type version tag. static void sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) { @@ -422,6 +431,7 @@ sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) if (temp == NULL) { return NULL; } + sym_set_type_from_const(temp, const_val); sym_set_type(temp, TRUE_CONST, 0); return temp; } @@ -552,7 +562,7 @@ op_is_zappable(int opcode) case _CHECK_VALIDITY: return true; default: - return _PyUop_Flags[opcode] & HAS_PURE_FLAG; + return (_PyUop_Flags[opcode] & HAS_PURE_FLAG) && !((_PyUop_Flags[opcode] & HAS_DEOPT_FLAG)); } } @@ -605,6 +615,7 @@ emit_const(uops_emitter *emitter, if (emit_i(emitter, load_const) < 0) { return -1; } + return 0; } @@ -826,16 +837,19 @@ uop_abstract_interpret_single_inst( // This is _PUSH_FRAME's stack effect STACK_SHRINK(1); ctx->frame->stack_pointer = stack_pointer; - if (ctx_frame_push(ctx, co, ctx->water_level, 0) != 0){ + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (!sym_is_type(self_or_null, SELF_OR_NULL)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + if (ctx_frame_push(ctx, co, localsplus_start, n_locals_already_filled, 0) != 0){ goto error; } stack_pointer = ctx->frame->stack_pointer; - // Cannot determine statically, so we can't propagate types. - if (!sym_is_type(self_or_null, SELF_OR_NULL)) { - for (int i = 0; i < argcount; i++) { - ctx->frame->locals[i] = args[i]; - } - } break; } @@ -1109,7 +1123,7 @@ uop_abstract_interpret( _Py_UOpsAbstractInterpContext ctx; - if (abstractinterp_init( + if (abstractcontext_init( &ctx, co, curr_stacklen, trace_len, new_trace) < 0) { @@ -1196,12 +1210,12 @@ uop_abstract_interpret( res = ctx.emitter.curr_i; - abstractinterp_fini(&ctx); + abstractcontext_fini(&ctx); return res; error: - abstractinterp_fini(&ctx); + abstractcontext_fini(&ctx); return -1; }