-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DAGCombiner] Fold subtraction if above threshold to umin
#134235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Piotr Fusik (pfusik) ChangesFolds patterns such as:
Before:
Or, with Zicond:
After:
Only applies to unsigned comparisons. Full diff: https://github.com/llvm/llvm-project/pull/134235.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8c409adedc2df..9e233af1661ab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14895,6 +14895,19 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
return V;
+ if (Subtarget.hasStdExtZbb()) {
+ // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+ using namespace llvm::SDPatternMatch;
+ SDValue Y;
+ if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::UMIN, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+ }
+ }
+
// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
// (select lhs, rhs, cc, x, (sub x, y))
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 90a8eadb3f974..50b198443b3a8 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1479,3 +1479,295 @@ entry:
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}
+
+define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
+; RV32I-LABEL: sub_if_uge_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a2, a1
+; RV32I-NEXT: zext.b a3, a0
+; RV32I-NEXT: sltu a2, a3, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: zext.b a2, a0
+; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.b a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i8 %x, %y
+ %select = select i1 %cmp, i8 0, i8 %y
+ %sub = sub nuw i8 %x, %select
+ ret i8 %sub
+}
+
+define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
+; RV32I-LABEL: sub_if_uge_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a3, a1, a2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sltu a2, a2, a3
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: zext.h a2, a0
+; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.h a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i16 %x, %y
+ %select = select i1 %cmp, i16 0, i16 %y
+ %sub = sub nuw i16 %x, %select
+ ret i16 %sub
+}
+
+define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
+; RV32I-LABEL: sub_if_uge_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a1, a0, a1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
+
+define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
+; RV32I-LABEL: sub_if_uge_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB52_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: j .LBB52_3
+; RV32I-NEXT: .LBB52_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: .LBB52_3:
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a3, a4, a3
+; RV32I-NEXT: and a2, a4, a2
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sltu a4, a0, a2
+; RV32ZBB-NEXT: sub a3, a1, a3
+; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a2, a0, a2
+; RV32ZBB-NEXT: beq a1, a3, .LBB52_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a1, a3
+; RV32ZBB-NEXT: beqz a4, .LBB52_3
+; RV32ZBB-NEXT: j .LBB52_4
+; RV32ZBB-NEXT: .LBB52_2:
+; RV32ZBB-NEXT: sltu a4, a0, a2
+; RV32ZBB-NEXT: bnez a4, .LBB52_4
+; RV32ZBB-NEXT: .LBB52_3:
+; RV32ZBB-NEXT: mv a0, a2
+; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: .LBB52_4:
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i64 %x, %y
+ %select = select i1 %cmp, i64 0, i64 %y
+ %sub = sub nuw i64 %x, %select
+ ret i64 %sub
+}
+
+define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
+; RV32I-LABEL: sub_if_uge_i128:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: beq a4, t0, .LBB53_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu t1, a4, t0
+; RV32I-NEXT: j .LBB53_3
+; RV32I-NEXT: .LBB53_2:
+; RV32I-NEXT: sltu t1, a5, a6
+; RV32I-NEXT: .LBB53_3:
+; RV32I-NEXT: lw a2, 0(a2)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: beq a3, a7, .LBB53_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu t2, a3, a7
+; RV32I-NEXT: j .LBB53_6
+; RV32I-NEXT: .LBB53_5:
+; RV32I-NEXT: sltu t2, a1, a2
+; RV32I-NEXT: .LBB53_6:
+; RV32I-NEXT: xor t3, a4, t0
+; RV32I-NEXT: xor t4, a5, a6
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: beqz t3, .LBB53_8
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: mv t2, t1
+; RV32I-NEXT: .LBB53_8:
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t1, t2, t0
+; RV32I-NEXT: and t0, t2, a2
+; RV32I-NEXT: and a7, t2, a7
+; RV32I-NEXT: sltu a2, a1, t0
+; RV32I-NEXT: and t2, t2, a6
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: beq a3, a7, .LBB53_10
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: sltu a6, a3, a7
+; RV32I-NEXT: .LBB53_10:
+; RV32I-NEXT: sub t3, a5, t2
+; RV32I-NEXT: sltu a5, a5, t2
+; RV32I-NEXT: sub a4, a4, t1
+; RV32I-NEXT: sub a3, a3, a7
+; RV32I-NEXT: sub a1, a1, t0
+; RV32I-NEXT: sltu a7, t3, a6
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a5, t3, a6
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: sub a2, a4, a7
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i128:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: lw a7, 0(a2)
+; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a6, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a3, 12(a1)
+; RV32ZBB-NEXT: lw a4, 0(a1)
+; RV32ZBB-NEXT: lw a1, 4(a1)
+; RV32ZBB-NEXT: sltu t1, a2, a5
+; RV32ZBB-NEXT: sub a6, a3, a6
+; RV32ZBB-NEXT: sltu t2, a4, a7
+; RV32ZBB-NEXT: sub a6, a6, t1
+; RV32ZBB-NEXT: mv t1, t2
+; RV32ZBB-NEXT: beq a1, t0, .LBB53_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: .LBB53_2:
+; RV32ZBB-NEXT: sub t3, a2, a5
+; RV32ZBB-NEXT: sltu a5, t3, t1
+; RV32ZBB-NEXT: sub a5, a6, a5
+; RV32ZBB-NEXT: sub a6, t3, t1
+; RV32ZBB-NEXT: beq a3, a5, .LBB53_4
+; RV32ZBB-NEXT: # %bb.3:
+; RV32ZBB-NEXT: sltu t1, a3, a5
+; RV32ZBB-NEXT: j .LBB53_5
+; RV32ZBB-NEXT: .LBB53_4:
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: .LBB53_5:
+; RV32ZBB-NEXT: sub t0, a1, t0
+; RV32ZBB-NEXT: sub t0, t0, t2
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: beq a1, t0, .LBB53_7
+; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: sltu t2, a1, t0
+; RV32ZBB-NEXT: j .LBB53_8
+; RV32ZBB-NEXT: .LBB53_7:
+; RV32ZBB-NEXT: sltu t2, a4, a7
+; RV32ZBB-NEXT: .LBB53_8:
+; RV32ZBB-NEXT: xor t3, a3, a5
+; RV32ZBB-NEXT: xor t4, a2, a6
+; RV32ZBB-NEXT: or t3, t4, t3
+; RV32ZBB-NEXT: beqz t3, .LBB53_10
+; RV32ZBB-NEXT: # %bb.9:
+; RV32ZBB-NEXT: mv t2, t1
+; RV32ZBB-NEXT: .LBB53_10:
+; RV32ZBB-NEXT: bnez t2, .LBB53_12
+; RV32ZBB-NEXT: # %bb.11:
+; RV32ZBB-NEXT: mv a4, a7
+; RV32ZBB-NEXT: mv a1, t0
+; RV32ZBB-NEXT: mv a2, a6
+; RV32ZBB-NEXT: mv a3, a5
+; RV32ZBB-NEXT: .LBB53_12:
+; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
+; RV32ZBB-NEXT: sw a3, 12(a0)
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i128 %x, %y
+ %select = select i1 %cmp, i128 0, i128 %y
+ %sub = sub nuw i128 %x, %select
+ ret i128 %sub
+}
+
+define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: sub_if_uge_multiuse_select_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: sll a0, a0, a1
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %shl = shl i32 %sub, %select
+ ret i32 %shl
+}
+
+define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sub a2, a0, a2
+; RV32I-NEXT: bltu a0, a1, .LBB55_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB55_2:
+; RV32I-NEXT: li a0, 2
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a2, a0, a1
+; RV32ZBB-NEXT: minu a2, a0, a2
+; RV32ZBB-NEXT: bltu a0, a1, .LBB55_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: li a0, 4
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
+; RV32ZBB-NEXT: .LBB55_2:
+; RV32ZBB-NEXT: li a0, 2
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %select2 = select i1 %cmp, i32 2, i32 4
+ %shl = shl i32 %sub, %select2
+ ret i32 %shl
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index d67db77c04a8e..49cf84e22e6c7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1699,3 +1699,211 @@ entry:
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}
+
+define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
+; RV64I-LABEL: sub_if_uge_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a2, a1
+; RV64I-NEXT: zext.b a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i8:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: zext.b a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.b a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i8 %x, %y
+ %select = select i1 %cmp, i8 0, i8 %y
+ %sub = sub nuw i8 %x, %select
+ ret i8 %sub
+}
+
+define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
+; RV64I-LABEL: sub_if_uge_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -1
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: sltu a2, a2, a3
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: zext.h a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.h a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i16 %x, %y
+ %select = select i1 %cmp, i16 0, i16 %y
+ %sub = sub nuw i16 %x, %select
+ ret i16 %sub
+}
+
+define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
+
+define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
+; RV64I-LABEL: sub_if_uge_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sub a1, a0, a1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i64 %x, %y
+ %select = select i1 %cmp, i64 0, i64 %y
+ %sub = sub nuw i64 %x, %select
+ ret i64 %sub
+}
+
+define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
+; RV64I-LABEL: sub_if_uge_i128:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beq a1, a3, .LBB66_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: sltu a4, a1, a3
+; RV64I-NEXT: j .LBB66_3
+; RV64I-NEXT: .LBB66_2:
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: .LBB66_3:
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a3, a4, a3
+; RV64I-NEXT: and a2, a4, a2
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: sub a1, a1, a4
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i128:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sltu a4, a0, a2
+; RV64ZBB-NEXT: sub a3, a1, a3
+; RV64ZBB-NEXT: sub a3, a3, a4
+; RV64ZBB-NEXT: sub a2, a0, a2
+; RV64ZBB-NEXT: beq a1, a3, .LBB66_2
+; RV64ZBB-NEXT: # %bb.1:
+; RV64ZBB-NEXT: sltu a4, a1, a3
+; RV64ZBB-NEXT: beqz a4, .LBB66_3
+; RV64ZBB-NEXT: j .LBB66_4
+; RV64ZBB-NEXT: .LBB66_2:
+; RV64ZBB-NEXT: sltu a4, a0, a2
+; RV64ZBB-NEXT: bnez a4, .LBB66_4
+; RV64ZBB-NEXT: .LBB66_3:
+; RV64ZBB-NEXT: mv a0, a2
+; RV64ZBB-NEXT: mv a1, a3
+; RV64ZBB-NEXT: .LBB66_4:
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i128 %x, %y
+ %select = select i1 %cmp, i128 0, i128 %y
+ %sub = sub nuw i128 %x, %select
+ ret i128 %sub
+}
+
+define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_multiuse_select_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_select_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a1
+; RV64ZBB-NEXT: sext.w a3, a0
+; RV64ZBB-NEXT: sltu a2, a3, a2
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: and a1, a2, a1
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %shl = shl i32 %sub, %select
+ ret i32 %shl
+}
+
+define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a4, a3, a2
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a1, a4, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: bltu a3, a2, .LBB68_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: li a1, 4
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB68_2:
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a1
+; RV64ZBB-NEXT: sext.w a3, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a3, a0
+; RV64ZBB-NEXT: bltu a3, a2, .LBB68_2
+; RV64ZBB-NEXT: # %bb.1:
+; RV64ZBB-NEXT: li a1, 4
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+; RV64ZBB-NEXT: .LBB68_2:
+; RV64ZBB-NEXT: li a1, 2
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %select2 = select i1 %cmp, i32 2, i32 4
+ %shl = shl i32 %sub, %select2
+ ret i32 %shl
+}
|
llvm/test/CodeGen/RISCV/rv32zbb.ll
Outdated
; RV32I-NEXT: sub a0, a0, a2 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_i64: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This pessimizes: no 64-bit minu
on RV32. How to restrict to legal types?
Tried:
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14870,8 +14870,10 @@ static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
}
-static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
if (SDValue V = combineSubOfBoolean(N, DAG))
return V;
@@ -14895,7 +14897,7 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
return V;
- if (Subtarget.hasStdExtZbb()) {
+ if (Subtarget.hasStdExtZbb() && DCI.isAfterLegalizeDAG()) {
// fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
using namespace llvm::SDPatternMatch;
SDValue Y;
@@ -18733,7 +18735,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: {
if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
return V;
- return performSUBCombine(N, DAG, Subtarget);
+ return performSUBCombine(N, DCI, Subtarget);
}
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
but this blocks this transform entirely.
isTypeLegal(VT)
is a member function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of checking for Subtarget.hasStdExtZbb()
you could check for TLI.isOperationLegal(ISD::UMIN, VT)
? And then maybe you could move this into DAGCombiner.cpp. This seems like a generically useful transform
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That fixes the >XLEN
cases, but unfortunately also disables i8
and i16
transforms. And i32
on RV64.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might also need to call TLI.getTypeToTransformTo for i8 + i16
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I found a solution that works. I'll try moving this to DAGCombiner.cpp
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is my first DAGCombiner change. What's the policy for DAGCombiner tests? Specifically, shall I add test/CodeGen/*
tests for all targets that are affected (have UMIN
) ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is my first DAGCombiner change. What's the policy for DAGCombiner tests? Specifically, shall I add
test/CodeGen/*
tests for all targets that are affected (haveUMIN
) ?
I don't think adding tests for all targets is necessary. I think it's sufficient as long as this new pattern has some coverage by any target
Build failed with:
I don't believe this is caused by my change. |
8000
@@ -14895,6 +14895,19 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, | |||
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget)) | |||
return V; | |||
|
|||
if (Subtarget.hasStdExtZbb()) { | |||
// fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there something that prevents this from also appearing as
(sub x, (select (uge y, x), y, 0)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's three differences:
- ult vs uge
- x, y vs y, x
- 0, y vs y, 0
Any two would form a different spelling of the same pattern. I can add more tests. Is there some canonical form or shortcuts to match different spellings?
All three at once (or one) is a different pattern.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, I meant (sub x, (select (uge x, y), y, 0)?
So only inverting the condition and swapping the select order
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
InstCombine replaces your pattern with mine:
define dso_local signext i32 @mod(i32 noundef signext %x, i32 noundef signext %y) local_unnamed_addr #0 {
entry:
%cmp = icmp uge i32 %x, %y
%cond = select i1 %cmp, i32 %y, i32 0
%sub = sub i32 %x, %cond
ret i32 %sub
}
; *** IR Dump After InstCombinePass on mod ***
; Function Attrs: nounwind uwtable vscale_range(2,1024)
define dso_local signext i32 @mod(i32 noundef signext %x, i32 noundef signext %y) local_unnamed_addr #0 {
entry:
%cmp.not = icmp ult i32 %x, %y
%cond = select i1 %cmp.not, i32 0, i32 %y
%sub = sub i32 %x, %cond
ret i32 %sub
}
minu
umin
Rebasing should fix this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM thanks
Folds patterns such as: unsigned foo(unsigned x, unsigned y) { return x >= y ? x - y : x; } Before: sltu a2, a0, a1 addi a2, a2, -1 and a1, a1, a2 subw a0, a0, a1 Or, with Zicond: sltu a2, a0, a1 czero.nez a1, a1, a2 subw a0, a0, a1 After: subw a1, a0, a1 minu a0, a0, a1 Only applies to unsigned comparisons. If `x >= y` then `x - y` is less than or equal `x`. Otherwise, `x - y` wraps and is greater than `x`.
Tests merged into main branch as e6e0f5f. PR rebased. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/6170 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/35/builds/9030 Here is the relevant piece of the build log for the reference
|
Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
#135194) Like #134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
) Folds patterns such as: unsigned foo(unsigned x, unsigned y) { return x >= y ? x - y : x; } Before, on RISC-V: sltu a2, a0, a1 addi a2, a2, -1 and a1, a1, a2 subw a0, a0, a1 Or, with Zicond: sltu a2, a0, a1 czero.nez a1, a1, a2 subw a0, a0, a1 After, with Zbb: subw a1, a0, a1 minu a0, a0, a1 Only applies to unsigned comparisons. If `x >= y` then `x - y` is less than or equal `x`. Otherwise, `x - y` wraps and is greater than `x`.
llvm#135194) Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
Folds patterns such as:
Before, on RISC-V:
Or, with Zicond:
After, with Zbb:
Only applies to unsigned comparisons.
If
x >= y
thenx - y
is less than or equalx
.Otherwise,
x - y
wraps and is greater thanx
.