[DAGCombiner] Fold subtraction if above threshold to umin#134235
[DAGCombiner] Fold subtraction if above threshold to umin#134235
umin#134235Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Piotr Fusik (pfusik) ChangesFolds patterns such as: Before: Or, with Zicond: After: Only applies to unsigned comparisons. Full diff: https://github.com/llvm/llvm-project/pull/134235.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8c409adedc2df..9e233af1661ab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14895,6 +14895,19 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
return V;
+ if (Subtarget.hasStdExtZbb()) {
+ // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+ using namespace llvm::SDPatternMatch;
+ SDValue Y;
+ if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::UMIN, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+ }
+ }
+
// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
// (select lhs, rhs, cc, x, (sub x, y))
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 90a8eadb3f974..50b198443b3a8 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1479,3 +1479,295 @@ entry:
%cmp = icmp ne i32 %popcnt, 1
ret i1 %cmp
}
+
+define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
+; RV32I-LABEL: sub_if_uge_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a2, a1
+; RV32I-NEXT: zext.b a3, a0
+; RV32I-NEXT: sltu a2, a3, a2
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: zext.b a2, a0
+; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.b a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i8 %x, %y
+ %select = select i1 %cmp, i8 0, i8 %y
+ %sub = sub nuw i8 %x, %select
+ ret i8 %sub
+}
+
+define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
+; RV32I-LABEL: sub_if_uge_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a3, a1, a2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: sltu a2, a2, a3
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: zext.h a2, a0
+; RV32ZBB-NEXT: sub a0, a0, a1
+; RV32ZBB-NEXT: zext.h a0, a0
+; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i16 %x, %y
+ %select = select i1 %cmp, i16 0, i16 %y
+ %sub = sub nuw i16 %x, %select
+ ret i16 %sub
+}
+
+define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
+; RV32I-LABEL: sub_if_uge_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a1, a0, a1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
+
+define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
+; RV32I-LABEL: sub_if_uge_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB52_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: j .LBB52_3
+; RV32I-NEXT: .LBB52_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: .LBB52_3:
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a3, a4, a3
+; RV32I-NEXT: and a2, a4, a2
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sltu a4, a0, a2
+; RV32ZBB-NEXT: sub a3, a1, a3
+; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a2, a0, a2
+; RV32ZBB-NEXT: beq a1, a3, .LBB52_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a1, a3
+; RV32ZBB-NEXT: beqz a4, .LBB52_3
+; RV32ZBB-NEXT: j .LBB52_4
+; RV32ZBB-NEXT: .LBB52_2:
+; RV32ZBB-NEXT: sltu a4, a0, a2
+; RV32ZBB-NEXT: bnez a4, .LBB52_4
+; RV32ZBB-NEXT: .LBB52_3:
+; RV32ZBB-NEXT: mv a0, a2
+; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: .LBB52_4:
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i64 %x, %y
+ %select = select i1 %cmp, i64 0, i64 %y
+ %sub = sub nuw i64 %x, %select
+ ret i64 %sub
+}
+
+define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
+; RV32I-LABEL: sub_if_uge_i128:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: beq a4, t0, .LBB53_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu t1, a4, t0
+; RV32I-NEXT: j .LBB53_3
+; RV32I-NEXT: .LBB53_2:
+; RV32I-NEXT: sltu t1, a5, a6
+; RV32I-NEXT: .LBB53_3:
+; RV32I-NEXT: lw a2, 0(a2)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: beq a3, a7, .LBB53_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu t2, a3, a7
+; RV32I-NEXT: j .LBB53_6
+; RV32I-NEXT: .LBB53_5:
+; RV32I-NEXT: sltu t2, a1, a2
+; RV32I-NEXT: .LBB53_6:
+; RV32I-NEXT: xor t3, a4, t0
+; RV32I-NEXT: xor t4, a5, a6
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: beqz t3, .LBB53_8
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: mv t2, t1
+; RV32I-NEXT: .LBB53_8:
+; RV32I-NEXT: addi t2, t2, -1
+; RV32I-NEXT: and t1, t2, t0
+; RV32I-NEXT: and t0, t2, a2
+; RV32I-NEXT: and a7, t2, a7
+; RV32I-NEXT: sltu a2, a1, t0
+; RV32I-NEXT: and t2, t2, a6
+; RV32I-NEXT: mv a6, a2
+; RV32I-NEXT: beq a3, a7, .LBB53_10
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: sltu a6, a3, a7
+; RV32I-NEXT: .LBB53_10:
+; RV32I-NEXT: sub t3, a5, t2
+; RV32I-NEXT: sltu a5, a5, t2
+; RV32I-NEXT: sub a4, a4, t1
+; RV32I-NEXT: sub a3, a3, a7
+; RV32I-NEXT: sub a1, a1, t0
+; RV32I-NEXT: sltu a7, t3, a6
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a5, t3, a6
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: sub a2, a4, a7
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_i128:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: lw a7, 0(a2)
+; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a6, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a3, 12(a1)
+; RV32ZBB-NEXT: lw a4, 0(a1)
+; RV32ZBB-NEXT: lw a1, 4(a1)
+; RV32ZBB-NEXT: sltu t1, a2, a5
+; RV32ZBB-NEXT: sub a6, a3, a6
+; RV32ZBB-NEXT: sltu t2, a4, a7
+; RV32ZBB-NEXT: sub a6, a6, t1
+; RV32ZBB-NEXT: mv t1, t2
+; RV32ZBB-NEXT: beq a1, t0, .LBB53_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: .LBB53_2:
+; RV32ZBB-NEXT: sub t3, a2, a5
+; RV32ZBB-NEXT: sltu a5, t3, t1
+; RV32ZBB-NEXT: sub a5, a6, a5
+; RV32ZBB-NEXT: sub a6, t3, t1
+; RV32ZBB-NEXT: beq a3, a5, .LBB53_4
+; RV32ZBB-NEXT: # %bb.3:
+; RV32ZBB-NEXT: sltu t1, a3, a5
+; RV32ZBB-NEXT: j .LBB53_5
+; RV32ZBB-NEXT: .LBB53_4:
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: .LBB53_5:
+; RV32ZBB-NEXT: sub t0, a1, t0
+; RV32ZBB-NEXT: sub t0, t0, t2
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: beq a1, t0, .LBB53_7
+; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: sltu t2, a1, t0
+; RV32ZBB-NEXT: j .LBB53_8
+; RV32ZBB-NEXT: .LBB53_7:
+; RV32ZBB-NEXT: sltu t2, a4, a7
+; RV32ZBB-NEXT: .LBB53_8:
+; RV32ZBB-NEXT: xor t3, a3, a5
+; RV32ZBB-NEXT: xor t4, a2, a6
+; RV32ZBB-NEXT: or t3, t4, t3
+; RV32ZBB-NEXT: beqz t3, .LBB53_10
+; RV32ZBB-NEXT: # %bb.9:
+; RV32ZBB-NEXT: mv t2, t1
+; RV32ZBB-NEXT: .LBB53_10:
+; RV32ZBB-NEXT: bnez t2, .LBB53_12
+; RV32ZBB-NEXT: # %bb.11:
+; RV32ZBB-NEXT: mv a4, a7
+; RV32ZBB-NEXT: mv a1, t0
+; RV32ZBB-NEXT: mv a2, a6
+; RV32ZBB-NEXT: mv a3, a5
+; RV32ZBB-NEXT: .LBB53_12:
+; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
+; RV32ZBB-NEXT: sw a3, 12(a0)
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i128 %x, %y
+ %select = select i1 %cmp, i128 0, i128 %y
+ %sub = sub nuw i128 %x, %select
+ ret i128 %sub
+}
+
+define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: sub_if_uge_multiuse_select_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: sll a0, a0, a1
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %shl = shl i32 %sub, %select
+ ret i32 %shl
+}
+
+define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
+; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a2, a2, a1
+; RV32I-NEXT: sub a2, a0, a2
+; RV32I-NEXT: bltu a0, a1, .LBB55_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB55_2:
+; RV32I-NEXT: li a0, 2
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: sub a2, a0, a1
+; RV32ZBB-NEXT: minu a2, a0, a2
+; RV32ZBB-NEXT: bltu a0, a1, .LBB55_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: li a0, 4
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
+; RV32ZBB-NEXT: .LBB55_2:
+; RV32ZBB-NEXT: li a0, 2
+; RV32ZBB-NEXT: sll a0, a2, a0
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %select2 = select i1 %cmp, i32 2, i32 4
+ %shl = shl i32 %sub, %select2
+ ret i32 %shl
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index d67db77c04a8e..49cf84e22e6c7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1699,3 +1699,211 @@ entry:
%cmp = icmp eq i32 %popcnt, 1
ret i1 %cmp
}
+
+define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
+; RV64I-LABEL: sub_if_uge_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a2, a1
+; RV64I-NEXT: zext.b a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i8:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: zext.b a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.b a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i8 %x, %y
+ %select = select i1 %cmp, i8 0, i8 %y
+ %sub = sub nuw i8 %x, %select
+ ret i8 %sub
+}
+
+define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
+; RV64I-LABEL: sub_if_uge_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -1
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: sltu a2, a2, a3
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: zext.h a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: zext.h a0, a0
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i16 %x, %y
+ %select = select i1 %cmp, i16 0, i16 %y
+ %sub = sub nuw i16 %x, %select
+ ret i16 %sub
+}
+
+define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ ret i32 %sub
+}
+
+define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
+; RV64I-LABEL: sub_if_uge_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sub a1, a0, a1
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i64 %x, %y
+ %select = select i1 %cmp, i64 0, i64 %y
+ %sub = sub nuw i64 %x, %select
+ ret i64 %sub
+}
+
+define i128 @sub_if_uge_i128(i128 %x, i128 %y) {
+; RV64I-LABEL: sub_if_uge_i128:
+; RV64I: # %bb.0:
+; RV64I
10BC0
-NEXT: beq a1, a3, .LBB66_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: sltu a4, a1, a3
+; RV64I-NEXT: j .LBB66_3
+; RV64I-NEXT: .LBB66_2:
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: .LBB66_3:
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a3, a4, a3
+; RV64I-NEXT: and a2, a4, a2
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: sub a1, a1, a4
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_i128:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sltu a4, a0, a2
+; RV64ZBB-NEXT: sub a3, a1, a3
+; RV64ZBB-NEXT: sub a3, a3, a4
+; RV64ZBB-NEXT: sub a2, a0, a2
+; RV64ZBB-NEXT: beq a1, a3, .LBB66_2
+; RV64ZBB-NEXT: # %bb.1:
+; RV64ZBB-NEXT: sltu a4, a1, a3
+; RV64ZBB-NEXT: beqz a4, .LBB66_3
+; RV64ZBB-NEXT: j .LBB66_4
+; RV64ZBB-NEXT: .LBB66_2:
+; RV64ZBB-NEXT: sltu a4, a0, a2
+; RV64ZBB-NEXT: bnez a4, .LBB66_4
+; RV64ZBB-NEXT: .LBB66_3:
+; RV64ZBB-NEXT: mv a0, a2
+; RV64ZBB-NEXT: mv a1, a3
+; RV64ZBB-NEXT: .LBB66_4:
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i128 %x, %y
+ %select = select i1 %cmp, i128 0, i128 %y
+ %sub = sub nuw i128 %x, %select
+ ret i128 %sub
+}
+
+define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_multiuse_select_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_select_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a1
+; RV64ZBB-NEXT: sext.w a3, a0
+; RV64ZBB-NEXT: sltu a2, a3, a2
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: and a1, a2, a1
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %shl = shl i32 %sub, %select
+ ret i32 %shl
+}
+
+define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
+; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: sltu a4, a3, a2
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: and a1, a4, a1
+; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: bltu a3, a2, .LBB68_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: li a1, 4
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB68_2:
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a2, a1
+; RV64ZBB-NEXT: sext.w a3, a0
+; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: minu a0, a3, a0
+; RV64ZBB-NEXT: bltu a3, a2, .LBB68_2
+; RV64ZBB-NEXT: # %bb.1:
+; RV64ZBB-NEXT: li a1, 4
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+; RV64ZBB-NEXT: .LBB68_2:
+; RV64ZBB-NEXT: li a1, 2
+; RV64ZBB-NEXT: sllw a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %select = select i1 %cmp, i32 0, i32 %y
+ %sub = sub nuw i32 %x, %select
+ %select2 = select i1 %cmp, i32 2, i32 4
+ %shl = shl i32 %sub, %select2
+ ret i32 %shl
+}
|
llvm/test/CodeGen/RISCV/rv32zbb.ll
Outdated
| ; RV32I-NEXT: sub a0, a0, a2 | ||
| ; RV32I-NEXT: ret | ||
| ; | ||
| ; RV32ZBB-LABEL: sub_if_uge_i64: |
There was a problem hiding this comment.
This pessimizes: no 64-bit minu on RV32. How to restrict to legal types?
Tried:
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14870,8 +14870,10 @@ static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
}
-static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
if (SDValue V = combineSubOfBoolean(N, DAG))
return V;
@@ -14895,7 +14897,7 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
return V;
- if (Subtarget.hasStdExtZbb()) {
+ if (Subtarget.hasStdExtZbb() && DCI.isAfterLegalizeDAG()) {
// fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
using namespace llvm::SDPatternMatch;
SDValue Y;
@@ -18733,7 +18735,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: {
if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
return V;
- return performSUBCombine(N, DAG, Subtarget);
+ return performSUBCombine(N, DCI, Subtarget);
}
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);but this blocks this transform entirely.
isTypeLegal(VT) is a member function.
There was a problem hiding this comment.
Instead of checking for Subtarget.hasStdExtZbb() you could check for TLI.isOperationLegal(ISD::UMIN, VT)? And then maybe you could move this into DAGCombiner.cpp. This seems like a generically useful transform
There was a problem hiding this comment.
That fixes the >XLEN cases, but unfortunately also disables i8 and i16 transforms. And i32 on RV64.
There was a problem hiding this comment.
You might also need to call TLI.getTypeToTransformTo for i8 + i16
There was a problem hiding this comment.
I found a solution that works. I'll try moving this to DAGCombiner.cpp.
There was a problem hiding this comment.
This is my first DAGCombiner change. What's the policy for DAGCombiner tests? Specifically, shall I add test/CodeGen/* tests for all targets that are affected (have UMIN) ?
There was a problem hiding this comment.
This is my first DAGCombiner change. What's the policy for DAGCombiner tests? Specifically, shall I add
test/CodeGen/*tests for all targets that are affected (haveUMIN) ?
I don't think adding tests for all targets is necessary. I think it's sufficient as long as this new pattern has some coverage by any target
|
Build failed with: I don't believe this is caused by my change. |
| return V; | ||
|
|
||
| if (Subtarget.hasStdExtZbb()) { | ||
| // fold (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y)) |
There was a problem hiding this comment.
Is there something that prevents this from also appearing as
(sub x, (select (uge y, x), y, 0)?
There was a problem hiding this comment.
That's three differences:
- ult vs uge
- x, y vs y, x
- 0, y vs y, 0
Any two would form a different spelling of the same pattern. I can add more tests. Is there some canonical form or shortcuts to match different spellings?
All three at once (or one) is a different pattern.
There was a problem hiding this comment.
Oops, I meant (sub x, (select (uge x, y), y, 0)? So only inverting the condition and swapping the select order
There was a problem hiding this comment.
InstCombine replaces your pattern with mine:
define dso_local signext i32 @mod(i32 noundef signext %x, i32 noundef signext %y) local_unnamed_addr #0 {
entry:
%cmp = icmp uge i32 %x, %y
%cond = select i1 %cmp, i32 %y, i32 0
%sub = sub i32 %x, %cond
ret i32 %sub
}
; *** IR Dump After InstCombinePass on mod ***
; Function Attrs: nounwind uwtable vscale_range(2,1024)
define dso_local signext i32 @mod(i32 noundef signext %x, i32 noundef signext %y) local_unnamed_addr #0 {
entry:
%cmp.not = icmp ult i32 %x, %y
%cond = select i1 %cmp.not, i32 0, i32 %y
%sub = sub i32 %x, %cond
ret i32 %sub
}
minuumin
Rebasing should fix this. |
Folds patterns such as:
unsigned foo(unsigned x, unsigned y) {
return x >= y ? x - y : x;
}
Before:
sltu a2, a0, a1
addi a2, a2, -1
and a1, a1, a2
subw a0, a0, a1
Or, with Zicond:
sltu a2, a0, a1
czero.nez a1, a1, a2
subw a0, a0, a1
After:
subw a1, a0, a1
minu a0, a0, a1
Only applies to unsigned comparisons.
If `x >= y` then `x - y` is less than or equal `x`.
Otherwise, `x - y` wraps and is greater than `x`.
|
Tests merged into main branch as e6e0f5f. PR rebased. |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/6170 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/35/builds/9030 Here is the relevant piece of the build log for the reference |
Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
#135194) Like #134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
This extends llvm#134235 to vectors.
Folds patterns such as:
Before, on RISC-V:
Or, with Zicond:
After, with Zbb:
Only applies to unsigned comparisons.
If
x >= ythenx - yis less than or equalx.Otherwise,
x - ywraps and is greater thanx.