8000 gh-126835: Move constant unaryop & binop folding to CFG by WolframAlph · Pull Request #129550 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-126835: Move constant unaryop & binop folding to CFG #129550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
04b4a84
move binop folding to cfg
WolframAlph Feb 14, 2025
a6babab
update tests
WolframAlph Feb 14, 2025
0fa7c4c
add match case folding tests
WolframAlph Feb 14, 2025
27b1a09
add peepholer tests
WolframAlph Feb 15, 2025
d3df8c8
polish ast code
WolframAlph Feb 15, 2025
e82841a
add assertions
WolframAlph Feb 15, 2025
cd6fbd7
use IS_NUMERIC_CONST_EXPR
WolframAlph Feb 15, 2025
300976e
bring back tests
WolframAlph Feb 15, 2025
3240d8e
move unaryop to cfg
WolframAlph Feb 15, 2025
74d2275
add tests
WolframAlph Feb 15, 2025
8358e28
cancel out unary not
WolframAlph Feb 15, 2025
c1a1be5
fix optimize_if_const_unaryop
WolframAlph Feb 15, 2025
ef9221a
add test_optimize_unary_not
WolframAlph Feb 15, 2025
598adce
add optimize_unary_not_non_const
WolframAlph Feb 16, 2025
bb60d4c
add tests
Wolf 8000 ramAlph Feb 16, 2025
eb99870
add tests
WolframAlph Feb 16, 2025
c93627e
add static
WolframAlph Feb 16, 2025
f0f044a
polish
WolframAlph Feb 16, 2025
a02ac66
add unaryop folding tests
WolframAlph Feb 16, 2025
2739fa0
add not not test to catch misoptimized case
WolframAlph Feb 17, 2025
258a5b6
revert old unarynot handing, add contains/is + unarynot folding
WolframAlph Feb 17, 2025
59ee897
address reviews
WolframAlph Feb 17, 2025
91ea2fa
address reviews
WolframAlph Feb 17, 2025
2c5ee86
simplify optimize_if_const_unaryop
WolframAlph Feb 17, 2025
0399fce
address reviews
WolframAlph Feb 19, 2025
3c53923
simplify instr_make_load_const
WolframAlph Feb 19, 2025
a6a06c8
address review for tests
WolframAlph Feb 20, 2025
2ea8425
replace macros
WolframAlph Feb 20, 2025
7c9c69b
update peepholer test
WolframAlph Feb 20, 2025
099afba
define is_unarynegative_const_complex_expr & is_allowed_match_case_bi…
WolframAlph Feb 20, 2025
0dad7c1
try to fold match case expression without checks
WolframAlph Feb 20, 2025
1e8b552
simplify folding
WolframAlph Feb 20, 2025
f4e9a42
address review
WolframAlph Feb 20, 2025
2dba23f
minor adjustments
WolframAlph Feb 21, 2025
cafbc61
Merge branch 'main' into fold-binop-cfg
iritkatriel Feb 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
move binop folding to cfg
  • Loading branch information
WolframAlph committed Feb 14, 2025
commit 04b4a847f2c0b98f9fe477491e5695ff07ed3bb9
289 changes: 112 additions & 177 deletions Python/ast_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,129 +136,6 @@ fold_unaryop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state)
return make_const(node, newval, arena);
}

/* Check whether a collection doesn't containing too much items (including
subcollections). This protects from creating a constant that needs
too much time for calculating a hash.
"limit" is the maximal number of items.
Returns the negative number if the total number of items exceeds the
limit. Otherwise returns the limit minus the total number of items.
*/

static Py_ssize_t
check_complexity(PyObject *obj, Py_ssize_t limit)
{
if (PyTuple_Check(obj)) {
Py_ssize_t i;
limit -= PyTuple_GET_SIZE(obj);
for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit);
}
return limit;
}
return limit;
}

#define MAX_INT_SIZE 128 /* bits */
#define MAX_COLLECTION_SIZE 256 /* items */
#define MAX_STR_SIZE 4096 /* characters */
#define MAX_TOTAL_ITEMS 1024 /* including nested collections */

static PyObject *
safe_multiply(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) &&
!_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
) {
int64_t vbits = _PyLong_NumBits(v);
int64_t wbits = _PyLong_NumBits(w);
assert(vbits >= 0);
assert(wbits >= 0);
if (vbits + wbits > MAX_INT_SIZE) {
return NULL;
}
}
else if (PyLong_Check(v) && PyTuple_Check(w)) {
Py_ssize_t size = PyTuple_GET_SIZE(w);
if (size) {
long n = PyLong_AsLong(v);
if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
return NULL;
}
if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
return NULL;
}
}
}
else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
PyBytes_GET_SIZE(w);
if (size) {
long n = PyLong_AsLong(v);
if (n < 0 || n > MAX_STR_SIZE / size) {
return NULL;
}
}
}
else if (PyLong_Check(w) &&
(PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
{
return safe_multiply(w, v);
}

return PyNumber_Multiply(v, w);
}

static PyObject *
safe_power(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) &&
!_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
) {
int64_t vbits = _PyLong_NumBits(v);
size_t wbits = PyLong_AsSize_t(w);
assert(vbits >= 0);
if (wbits == (size_t)-1) {
return NULL;
}
if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
return NULL;
}
}

return PyNumber_Power(v, w, Py_None);
}

static PyObject *
safe_lshift(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) &&
!_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
) {
int64_t vbits = _PyLong_NumBits(v);
size_t wbits = PyLong_AsSize_t(w);
assert(vbits >= 0);
if (wbits == (size_t)-1) {
return NULL;
}
if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
return NULL;
}
}

return PyNumber_Lshift(v, w);
}

static PyObject *
safe_mod(PyObject *v, PyObject *w)
{
if (PyUnicode_Check(v) || PyBytes_Check(v)) {
return NULL;
}

return PyNumber_Remainder(v, w);
}


static expr_ty
parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
{
Expand Down Expand Up @@ -478,58 +355,7 @@ fold_binop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state)
return optimize_format(node, lv, rhs->v.Tuple.elts, arena);
}

if (rhs->kind != Constant_kind) {
return 1;
}

PyObject *rv = rhs->v.Constant.value;
PyObject *newval = NULL;

switch (node->v.BinOp.op) {
case Add:
newval = PyNumber_Add(lv, rv);
break;
case Sub:
newval = PyNumber_Subtract(lv, rv);
break;
case Mult:
newval = safe_multiply(lv, rv);
break;
case Div:
newval = PyNumber_TrueDivide(lv, rv);
break;
case FloorDiv:
newval = PyNumber_FloorDivide(lv, rv);
break;
case Mod:
newval = safe_mod(lv, rv);
break;
case Pow:
newval = safe_power(lv, rv);
break;
case LShift:
newval = safe_lshift(lv, rv);
break;
case RShift:
newval = PyNumber_Rshift(lv, rv);
break;
case BitOr:
newval = PyNumber_Or(lv, rv);
break;
case BitXor:
newval = PyNumber_Xor(lv, rv);
break;
case BitAnd:
newval = PyNumber_And(lv, rv);
break;
// No builtin constants implement the following operators
case MatMult:
return 1;
// No default case, so the compiler will emit a warning if new binary
// operators are added without being handled here
}

return make_const(node, newval, arena);
return 1;
}

static PyObject*
Expand Down Expand Up @@ -971,6 +797,115 @@ astfold_withitem(withitem_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
return 1;
}

#define IS_CONST_EXPR(N) \
((N)->kind == Constant_kind)

#define CONST_EXPR_VALUE(N) \
((N)->v.Constant.value)

#define IS_COMPLEX_CONST_EXPR(N) \
(IS_CONST_EXPR(N) && PyComplex_CheckExact(CONST_EXPR_VALUE(N)))

#define IS_NUMERIC_CONST_EXPR(N) \
(IS_CONST_EXPR(N) && (PyLong_CheckExact(CONST_EXPR_VALUE(N)) || PyFloat_CheckExact(CONST_EXPR_VALUE(N))))

#define IS_UNARY_EXPR(N) \
((N)->kind == UnaryOp_kind)

#define UNARY_EXPR_OP(N) \
((N)->v.UnaryOp.op)

#define UNARY_EXPR_OPERAND(N) \
((N)->v.UnaryOp.operand)

#define UNARY_EXPR_OPERAND_CONST_VALUE(N) \
(CONST_EXPR_VALUE(UNARY_EXPR_OPERAND(N)))

#define IS_UNARY_SUB_EXPR(N) \
(IS_UNARY_EXPR(N) && UNARY_EXPR_OP(N) == USub)

#define IS_NUMERIC_UNARY_CONST_EXPR(N) \
(IS_UNARY_SUB_EXPR(N) && IS_NUMERIC_CONST_EXPR(UNARY_EXPR_OPERAND(N)))

#define IS_COMPLEX_UNARY_CONST_EXPR(N) \
(IS_UNARY_SUB_EXPR(N) && IS_COMPLEX_CONST_EXPR(UNARY_EXPR_OPERAND(N)))

#define BINARY_EXPR(N) \
((N)->v.BinOp)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure macros like this are helpful - the name of the macro doesn't reflect what it is (it's too cryptic) so you need to look at the definition to understand the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it read better if we were to access nodes attributes directly everywhere?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case I think it would.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be quite tedious to write assertions that are used for example in fold_const_unary_or_complex_expr or fold_const_binary_complex_expr + checks if we were to access all nodes attributes every time. Maybe work on naming instead? IMO macros like IS_MATCH_COMPLEX_BINARY_CONST_EXPR and IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR are helpful to summarize quite nicely pattern of the subtree we are looking for. I would favor to work on naming to make it more expressive.

Copy link
1E80
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's how the rest of this file is written. There is a lot of node_->v.SomeType.some_field. Also in ast.c, codegen.c, ast_unparse.c. If we go with the macros then this part of this file is using a different style from everything else.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, let me try to write it in that way and let's see if it looks better that way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Note that my point is not that it would necessarily look better, but that it would be consistent with the style of the rest of the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. No problem.


#define BINARY_EXPR_OP(N) \
(BINARY_EXPR(N).op)

#define BINARY_EXPR_LEFT(N) \
(BINARY_EXPR(N).left)

#define BINARY_EXPR_RIGHT(N) \
(BINARY_EXPR(N).right)

#define IS_BINARY_EXPR(N) \
((N)->kind == BinOp_kind)

#define IS_BINARY_ADD_EXPR(N) \
(IS_BINARY_EXPR(N) && BINARY_EXPR_OP(N) == Add)

#define IS_BINARY_SUB_EXPR(N) \
(IS_BINARY_EXPR(N) && BINARY_EXPR_OP(N) == Sub)

#define IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR(N) \
(IS_NUMERIC_UNARY_CONST_EXPR(N) || IS_COMPLEX_UNARY_CONST_EXPR(N))

#define IS_MATCH_COMPLEX_BINARY_CONST_EXPR(N) \
( \
(IS_BINARY_ADD_EXPR(N) || IS_BINARY_SUB_EXPR(N)) \
&& (IS_NUMERIC_UNARY_CONST_EXPR(BINARY_EXPR_LEFT(N)) || IS_CONST_EXPR(BINARY_EXPR_LEFT(N))) \
&& IS_COMPLEX_CONST_EXPR(BINARY_EXPR_RIGHT(N)) \
)


static int
fold_const_unary_or_complex_expr(expr_ty e, PyArena *arena)
{
assert(IS_MATCH_NUMERIC_OR_COMPLEX_UNARY_CONST_EXPR(e));
PyObject *constant = UNARY_EXPR_OPERAND_CONST_VALUE(e);
assert(UNARY_EXPR_OP(e) == USub);
PyObject* folded = PyNumber_Negative(constant);
return make_const(e, folded, arena);
}

static int
fold_const_binary_complex_expr(expr_ty e, PyArena *arena)
{
assert(IS_MATCH_COMPLEX_BINARY_CONST_EXPR(e));
expr_ty left_expr = BINARY_EXPR_LEFT(e);
if (IS_NUMERIC_UNARY_CONST_EXPR(left_expr)) {
if (!fold_const_unary_or_complex_expr(left_expr, arena)) {
return 0;
}
}
assert(IS_CONST_EXPR(BINARY_EXPR_LEFT(e)));
operator_ty op = BINARY_EXPR_OP(e);
PyObject *left = CONST_EXPR_VALUE(BINARY_EXPR_LEFT(e));
PyObject *right = CONST_EXPR_VALUE(BINARY_EXPR_RIGHT(e));
assert(op == Add || op == Sub);
PyObject *folded = op == Add ? PyNumber_Add(left, right) : PyNumber_Subtract(left, right);
return make_const(e, folded, arena);
}

static int
fold_pattern_match_value(expr_ty node, PyArena *arena, _PyASTOptimizeState *Py_UNUSED(state))
{
switch (node->kind)
{
case UnaryOp_kind:
return fold_const_unary_or_complex_expr(node, arena);
case BinOp_kind:
return fold_const_binary_complex_expr(node, arena);
default:
break;
}
return 1;
}

static int
astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
{
Expand All @@ -980,15 +915,15 @@ astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
ENTER_RECURSIVE(state);
switch (node_->kind) {
case MatchValue_kind:
CALL(astfold_expr, expr_ty, node_->v.MatchValue.value);
CALL(fold_pattern_match_value, expr_ty, node_->v.MatchValue.value);
break;
case MatchSingleton_kind:
break;
case MatchSequence_kind:
CALL_SEQ(astfold_pattern, pattern, node_->v.MatchSequence.patterns);
break;
case MatchMapping_kind:
CALL_SEQ(astfold_expr, expr, node_->v.MatchMapping.keys);
CALL_SEQ(fold_pattern_match_value, expr, node_->v.MatchMapping.keys);
CALL_SEQ(astfold_pattern, pattern, node_->v.MatchMapping.patterns);
break;
case MatchClass_kind:
Expand Down
Loading
0