-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] handle (X + A) % Op1 for small X, A #140369
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-llvm-transforms Author: None (DesWurstes) ChangesAllows LLVM to optimize rem out of loops like this: void c(void)
{
unsigned m = 0;
for(int i = 0; i < x; i++)
{
m += 7;
m %= 600;
g(m);
}
} Alive2 proof: https://alive2.llvm.org/ce/z/NXHJJD define noundef i16 @<!-- -->src(i16 noundef %x, i16 noundef %n, i16 noundef %a) unnamed_addr {
start:
%_3 = icmp ult i16 %x, %n
tail call void @<!-- -->llvm.assume(i1 %_3)
%_4 = icmp ult i16 %a, %n
tail call void @<!-- -->llvm.assume(i1 %_4)
%_6 = add nuw i16 %x, %a
%0 = urem i16 %_6, %n
ret i16 %0
}
define noundef i16 @<!-- -->tgt(i16 noundef %x, i16 noundef %n, i16 noundef %a) unnamed_addr {
start:
%_3 = icmp ult i16 %x, %n
tail call void @<!-- -->llvm.assume(i1 %_3)
%_4 = icmp ult i16 %a, %n
tail call void @<!-- -->llvm.assume(i1 %_4)
%_6 = add nuw i16 %x, %a
%w = icmp uge i16 %_6, %n
%_7 = sub nuw i16 %_6, %n
%0 = select i1 %w, i16 %_7, i16 %_6
ret i16 %0
}
declare void @<!-- -->llvm.assume(i1 noundef) I added the equivalents of tests for Full diff: https://github.com/llvm/llvm-project/pull/140369.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c7023eb79b04e..2e0d25ab46c15 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -2461,6 +2461,24 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
}
}
+ // For "(X + A) % Op1" and if (X u< Op1 && A u< Op1)
+ // => (X + A) >= Op1 ? X + A - Op1 : X + A .
+ Value *A = nullptr;
+ if (match(Op0, m_Add(m_Value(X), m_Value(A)))) {
+ Value *Val_X =
+ simplifyICmpInst(ICmpInst::ICMP_ULT, X, Op1, SQ.getWithInstruction(&I));
+ Value *Val_A =
+ simplifyICmpInst(ICmpInst::ICMP_ULT, A, Op1, SQ.getWithInstruction(&I));
+ if (Val_X && match(Val_X, m_One()) && Val_A && match(Val_A, m_One())) {
+ Value *FrozenOp0 = Op0;
+ if (!isGuaranteedNotToBeUndef(Op0))
+ FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
+ Value *Cmp = Builder.CreateICmpUGE(FrozenOp0, Op1);
+ Value *Sub = Builder.CreateSub(FrozenOp0, Op1);
+ return SelectInst::Create(Cmp, Sub, FrozenOp0);
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll b/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
index 02be67a2ca250..0f2567e6475da 100644
--- a/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
@@ -19,6 +19,29 @@ define i8 @urem_assume(i8 %x, i8 %n) {
ret i8 %out
}
+; https://alive2.llvm.org/ce/z/NXHJJD
+define i8 @urem_assume_a(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_assume_a(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X_FR:%.*]], [[N:%.*]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[CMP_A:%.*]] = icmp ult i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_A]])
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X_FR]], [[A]]
+; CHECK-NEXT: [[ADD_FROZEN:%.*]] = freeze i8 [[ADD]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i8 [[ADD_FROZEN]], [[N]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i8 0, i8 [[N]]
+; CHECK-NEXT: [[OUT:%.*]] = sub i8 [[ADD_FROZEN]], [[TMP1]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+ %cmp = icmp ult i8 %x, %n
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp_a = icmp ult i8 %a, %n
+ tail call void @llvm.assume(i1 %cmp_a)
+ %add = add nuw i8 %x, %a
+ %out = urem i8 %add, %n
+ ret i8 %out
+}
+
; https://alive2.llvm.org/ce/z/MGgtYN
define i8 @urem_assume_without_nuw(i8 %x, i8 %n) {
; CHECK-LABEL: @urem_assume_without_nuw(
@@ -53,6 +76,48 @@ define i8 @urem_assume_eq(i8 %x, i8 %n) {
ret i8 %out
}
+; Negative test: The assume is false
+define i8 @urem_assume_eq_x(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_assume_eq_x(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[CMP_A:%.*]] = icmp ult i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_A]])
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1
+; CHECK-NEXT: [[OUT:%.*]] = urem i8 [[ADD]], [[N]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+ %cmp = icmp eq i8 %x, %n
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp_a = icmp ult i8 %a, %n
+ tail call void @llvm.assume(i1 %cmp_a)
+ %add = add i8 %x, 1
+ %out = urem i8 %add, %n
+ ret i8 %out
+}
+
+; Negative test: The assume is false
+define i8 @urem_assume_eq_a(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_assume_eq_a(
+; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X_FR]], [[N:%.*]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_A]])
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X_FR]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[ADD]], [[N]]
+; CHECK-NEXT: [[OUT:%.*]] = select i1 [[TMP1]], i8 0, i8 [[ADD]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+ %cmp = icmp ult i8 %x, %n
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp_a = icmp eq i8 %a, %n
+ tail call void @llvm.assume(i1 %cmp_a)
+ %add = add i8 %x, 1
+ %out = urem i8 %add, %n
+ ret i8 %out
+}
+
; Negative test: The assume is false
define i8 @urem_assume_ne(i8 %x, i8 %n) {
; CHECK-LABEL: @urem_assume_ne(
@@ -71,6 +136,52 @@ start:
ret i8 %out
}
+; Negative test: The assume is false
+define i8 @urem_assume_ne_x(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_assume_ne_x(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[CMP_A:%.*]] = icmp ult i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_A]])
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], 1
+; CHECK-NEXT: [[OUT:%.*]] = urem i8 [[ADD]], [[N]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+start:
+ %cmp = icmp ne i8 %x, %n
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp_a = icmp ult i8 %a, %n
+ tail call void @llvm.assume(i1 %cmp_a)
+ %add = add i8 %x, 1
+ %out = urem i8 %add, %n
+ ret i8 %out
+}
+
+; Negative test: The assume is false
+define i8 @urem_assume_ne_a(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_assume_ne_a(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X_FR]], [[N:%.*]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[CMP_A:%.*]] = icmp ne i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_A]])
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X_FR]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i8 [[ADD]], [[N]]
+; CHECK-NEXT: [[OUT:%.*]] = select i1 [[TMP0]], i8 0, i8 [[ADD]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+start:
+ %cmp = icmp ult i8 %x, %n
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp_a = icmp ne i8 %a, %n
+ tail call void @llvm.assume(i1 %cmp_a)
+ %add = add i8 %x, 1
+ %out = urem i8 %add, %n
+ ret i8 %out
+}
+
; Negative test: The add constant is not 1
define i8 @urem_assume_with_unexpected_const(i8 %x, i8 %n) {
; CHECK-LABEL: @urem_assume_with_unexpected_const(
@@ -103,6 +214,39 @@ define i8 @urem_without_assume(i8 %arg, i8 %arg2) {
ret i8 %out
}
+; Negative test: The add constant is not less than %arg2
+define i8 @urem_without_assume_a_var(i8 %arg, i8 %arg2, i8 %a) {
+; CHECK-LABEL: @urem_without_assume_a_var(
+; CHECK-NEXT: [[X:%.*]] = urem i8 [[ARG:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[A:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = urem i8 [[ADD]], [[ARG2]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+ %x = urem i8 %arg, %arg2
+ %add = add i8 %x, %a
+ %out = urem i8 %add, %arg2
+ ret i8 %out
+}
+
+; https://alive2.llvm.org/ce/z/tcdf4d
+define i8 @urem_without_assume_a(i8 %arg, i8 %arg2, i8 %a) {
+; CHECK-LABEL: @urem_without_assume_a(
+; CHECK-NEXT: [[X_REM:%.*]] = urem i8 [[ARG:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[A_REM:%.*]] = urem i8 [[A:%.*]], [[ARG2]]
+; CHECK-NEXT: [[ADDD:%.*]] = add i8 [[X_REM]], [[A_REM]]
+; CHECK-NEXT: [[ADD_FROZEN:%.*]] = freeze i8 [[ADDD]]
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ult i8 [[ADD_FROZEN]], [[ARG2]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i8 0, i8 [[ARG2]]
+; CHECK-NEXT: [[OUT:%.*]] = sub i8 [[ADD_FROZEN]], [[TMP1]]
+; CHECK-NEXT: ret i8 [[OUT]]
+;
+ %x_rem = urem i8 %arg, %arg2
+ %a_rem = urem i8 %a, %arg2
+ %add = add i8 %x_rem, %a_rem
+ %out = urem i8 %add, %arg2
+ ret i8 %out
+}
+
; https://alive2.llvm.org/ce/z/eHkgRa
define i8 @urem_with_dominating_condition(i8 %x, i8 %n) {
; CHECK-LABEL: @urem_with_dominating_condition(
@@ -177,4 +321,60 @@ define noundef i8 @urem_with_opposite_condition(i8 %x, i8 %n) {
ret i8 0
}
+; Negative test
+define noundef i8 @urem_with_opposite_condition_x(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_with_opposite_condition_x(
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[DOTBB2:%.*]], label [[DOTBB0:%.*]]
+; CHECK: .bb0:
+; CHECK-NEXT: [[COND2:%.*]] = icmp ult i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[DOTBB1:%.*]], label [[DOTBB2]]
+; CHECK: .bb1:
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[A]]
+; CHECK-NEXT: [[OUT:%.*]] = urem i8 [[ADD]], [[N]]
+; CHECK-NEXT: ret i8 [[OUT]]
+; CHECK: .bb2:
+; CHECK-NEXT: ret i8 0
+;
+ %cond = icmp ult i8 %x, %n
+ br i1 %cond, label %.bb2, label %.bb0 ; Revert the condition
+.bb0:
+ %cond2 = icmp ult i8 %a, %n
+ br i1 %cond2, label %.bb1, label %.bb2
+.bb1:
+ %add = add i8 %x, %a
+ %out = urem i8 %add, %n
+ ret i8 %out
+.bb2:
+ ret i8 0
+}
+
+; Negative test
+define noundef i8 @urem_with_opposite_condition_a(i8 %x, i8 %n, i8 %a) {
+; CHECK-LABEL: @urem_with_opposite_condition_a(
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[DOTBB0:%.*]], label [[DOTBB2:%.*]]
+; CHECK: .bb0:
+; CHECK-NEXT: [[COND2:%.*]] = icmp ult i8 [[A:%.*]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[DOTBB2]], label [[DOTBB1:%.*]]
+; CHECK: .bb1:
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[A]]
+; CHECK-NEXT: [[OUT:%.*]] = urem i8 [[ADD]], [[N]]
+; CHECK-NEXT: ret i8 [[OUT]]
+; CHECK: .bb2:
+; CHECK-NEXT: ret i8 0
+;
+ %cond = icmp ult i8 %x, %n
+ br i1 %cond, label %.bb0, label %.bb2
+.bb0:
+ %cond2 = icmp ult i8 %a, %n
+ br i1 %cond2, label %.bb2, label %.bb1 ; Revert the condition
+.bb1:
+ %add = add i8 %x, %a
+ %out = urem i8 %add, %n
+ ret i8 %out
+.bb2:
+ ret i8 0
+}
+
declare void @llvm.assume(i1 noundef)
|
|
I'll fix my commit email address and reopen |
0f9ed0e
to
fdaef61
Compare
|
The CI failure looks unrelated (flang build failure). BTW, I think this patch should not be implemented in InstCombine. Generally, InstCombine canonicalizes patterns into a smaller one for better analysis results. BTW, this fold is not always profitable due to the following two reasons:
Therefore, I think you can implement this transformation in some target-dependent passes (e.g., CGP). |
Okay, I had implemented it there because just above where I added my code, there's optimization for |
Allows LLVM to optimize rem out of loops like this:
by replacing it with
m + 7 >= 600 ? m + 7 - 600 : m + 7
Alive2 proof: https://alive2.llvm.org/ce/z/NXHJJD
I added the equivalents of tests for
(X + 1) % Op1
, except forurem_with_dominating_condition
andurem_with_dominating_condition_false
which don't get matched by the pattern matcher and don't get optimized. I couldn't find a reason for this in my code, the reason is likely outside it. If I were to add these, these would be: