[DAGCombiner] Fold subtraction if above a constant threshold to `umin` #135194

pfusik · 2025-04-10T14:59:46Z

Like #134235, but with a constant.
It's a pattern in Adler-32 checksum calculation in zlib.

Example:

unsigned adler32_mod(unsigned x) {
  return x >= 65521u ? x - 65521u : x;
}

Before, on RISC-V:

lui     a1, 16
lui     a2, 1048560
addiw   a1, a1, -16
sltu    a1, a1, a0
negw    a1, a1
addi    a2, a2, 15
and     a1, a1, a2
addw    a0, a0, a1

Or, with Zicond:

lui     a1, 16
lui     a2, 1048560
addiw   a1, a1, -16
sltu    a1, a1, a0
addi    a2, a2, 15
czero.eqz  a1, a2, a1
addw    a0, a0, a1

After, with Zbb:

lui     a1, 1048560
addi    a1, a1, 15
addw    a1, a0, a1
minu    a0, a1, a0

llvmbot · 2025-04-10T15:00:23Z

@llvm/pr-subscribers-llvm-selectiondag

Author: Piotr Fusik (pfusik)

Changes

Like #134235, but with a constant.
It's used in Adler-32 checksum calculation in zlib.

Example:

unsigned adler32_mod(unsigned x) {
  return x &gt;= 65521u ? x - 65521u : x;
}

Before, on RISC-V:

lui     a1, 16
lui     a2, 1048560
addiw   a1, a1, -16
sltu    a1, a1, a0
negw    a1, a1
addi    a2, a2, 15
and     a1, a1, a2
addw    a0, a0, a1

Or, with Zicond:

lui     a1, 16
lui     a2, 1048560
addiw   a1, a1, -16
sltu    a1, a1, a0
addi    a2, a2, 15
czero.eqz  a1, a2, a1
addw    a0, a0, a1

After, with Zbb:

lui a1, 1048560
addi a1, a1, 15
addw a1, a0, a1
minu a0, a1, a0

Full diff: https://github.com/llvm/llvm-project/pull/135194.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+14-4)
(modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+163)
(modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+162)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a28caa758659..46ab342d6f899 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -845,6 +845,13 @@ namespace {
       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
     }
 
+    bool hasUMin(EVT VT) const {
+      auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
+      return (LK.first == TargetLoweringBase::TypeLegal ||
+              LK.first == TargetLoweringBase::TypePromoteInteger) &&
+        TLI.isOperationLegal(ISD::UMIN, LK.second);
+    }
+
   public:
     /// Runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
@@ -4253,10 +4260,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 
   // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
   // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
-  auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
-  if ((LK.first == TargetLoweringBase::TypeLegal ||
-       LK.first == TargetLoweringBase::TypePromoteInteger) &&
-      TLI.isOperationLegal(ISD::UMIN, LK.second)) {
+  if (hasUMin(VT)) {
     SDValue Y;
     if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
                                                m_SpecificCondCode(ISD::SETULT)),
@@ -12074,6 +12078,12 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
 
     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
       return NewSel;
+
+    // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
+    APInt C;
+    if (CC == ISD::SETUGT && Cond0 == N2 && sd_match(Cond1, m_ConstInt(C)) &&
+        sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C))) && hasUMin(VT))
+      return DAG.getNode(ISD::UMIN, DL, VT, N1, N2);
   }
 
   if (!VT.isVector())
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 5afc5ecb44098..9c84d5c18e8f7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1718,3 +1718,166 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
   %sub = sub nuw i32 %x, %select
   ret i32 %sub
 }
+
+define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
+; RV32I-LABEL: sub_if_uge_C_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltiu a1, a0, 13
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    andi a1, a1, -13
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    addi a1, a0, -13
+; RV32ZBB-NEXT:    zext.b a1, a1
+; RV32ZBB-NEXT:    minu a0, a1, a0
+; RV32ZBB-NEXT:    ret
+  %cmp = icmp ugt i8 %x, 12
+  %sub = add i8 %x, -13
+  %conv4 = select i1 %cmp, i8 %sub, i8 %x
+  ret i8 %conv4
+}
+
+define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
+; RV32I-LABEL: sub_if_uge_C_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltiu a1, a0, 251
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    andi a1, a1, -251
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    addi a1, a0, -251
+; RV32ZBB-NEXT:    zext.h a1, a1
+; RV32ZBB-NEXT:    minu a0, a1, a0
+; RV32ZBB-NEXT:    ret
+  %cmp = icmp ugt i16 %x, 250
+  %sub = add i16 %x, -251
+  %conv4 = select i1 %cmp, i16 %sub, i16 %x
+  ret i16 %conv4
+}
+
+define i32 @sub_if_uge_C_i32(i32 signext %x) {
+; RV32I-LABEL: sub_if_uge_C_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 16
+; RV32I-NEXT:    lui a2, 1048560
+; RV32I-NEXT:    addi a1, a1, -16
+; RV32I-NEXT:    sltu a1, a1, a0
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    addi a2, a2, 15
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lui a1, 1048560
+; RV32ZBB-NEXT:    addi a1, a1, 15
+; RV32ZBB-NEXT:    add a1, a0, a1
+; RV32ZBB-NEXT:    minu a0, a1, a0
+; RV32ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %x, 65520
+  %sub = add i32 %x, -65521
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}
+
+define i64 @sub_if_uge_C_i64(i64 %x) {
+; CHECK-LABEL: sub_if_uge_C_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    beq a1, a2, .LBB60_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    sltiu a2, a1, 2
+; CHECK-NEXT:    xori a2, a2, 1
+; CHECK-NEXT:    j .LBB60_3
+; CHECK-NEXT:  .LBB60_2:
+; CHECK-NEXT:    lui a2, 172127
+; CHECK-NEXT:    addi a2, a2, 511
+; CHECK-NEXT:    sltu a2, a2, a0
+; CHECK-NEXT:  .LBB60_3:
+; CHECK-NEXT:    neg a2, a2
+; CHECK-NEXT:    andi a3, a2, -2
+; CHECK-NEXT:    add a1, a1, a3
+; CHECK-NEXT:    lui a3, 876449
+; CHECK-NEXT:    addi a3, a3, -512
+; CHECK-NEXT:    and a2, a2, a3
+; CHECK-NEXT:    add a2, a0, a2
+; CHECK-NEXT:    sltu a0, a2, a0
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    mv a0, a2
+; CHECK-NEXT:    ret
+  %cmp = icmp ugt i64 %x, 4999999999
+  %sub = add i64 %x, -5000000000
+  %cond = select i1 %cmp, i64 %sub, i64 %x
+  ret i64 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
+; RV32I-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    lui a3, 1048560
+; RV32I-NEXT:    addi a2, a2, -16
+; RV32I-NEXT:    sltu a2, a2, a0
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    addi a3, a3, 15
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    add a0, a0, a3
+; RV32I-NEXT:    sw a2, 0(a1)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lui a2, 16
+; RV32ZBB-NEXT:    lui a3, 1048560
+; RV32ZBB-NEXT:    addi a2, a2, -16
+; RV32ZBB-NEXT:    addi a3, a3, 15
+; RV32ZBB-NEXT:    sltu a2, a2, a0
+; RV32ZBB-NEXT:    add a3, a0, a3
+; RV32ZBB-NEXT:    minu a0, a3, a0
+; RV32ZBB-NEXT:    sw a2, 0(a1)
+; RV32ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %x, 65520
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %sub = add i32 %x, -65521
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
+; RV32I-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 1048560
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a2, a2, 15
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    addi a3, a3, -16
+; RV32I-NEXT:    sw a2, 0(a1)
+; RV32I-NEXT:    bltu a3, a0, .LBB62_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:  .LBB62_2:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lui a2, 1048560
+; RV32ZBB-NEXT:    addi a2, a2, 15
+; RV32ZBB-NEXT:    add a2, a0, a2
+; RV32ZBB-NEXT:    minu a0, a2, a0
+; RV32ZBB-NEXT:    sw a2, 0(a1)
+; RV32ZBB-NEXT:    ret
+  %sub = add i32 %x, -65521
+  store i32 %sub, ptr %z, align 4
+  %cmp = icmp ugt i32 %x, 65520
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 2ae10da967754..8a0743cd762a7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1884,3 +1884,165 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, pt
   %sub = sub nuw i32 %x, %select
   ret i32 %sub
 }
+
+define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
+; RV64I-LABEL: sub_if_uge_C_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltiu a1, a0, 13
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    andi a1, a1, -13
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    addi a1, a0, -13
+; RV64ZBB-NEXT:    zext.b a1, a1
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i8 %x, 12
+  %sub = add i8 %x, -13
+  %conv4 = select i1 %cmp, i8 %sub, i8 %x
+  ret i8 %conv4
+}
+
+define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
+; RV64I-LABEL: sub_if_uge_C_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltiu a1, a0, 251
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    andi a1, a1, -251
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    addi a1, a0, -251
+; RV64ZBB-NEXT:    zext.h a1, a1
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i16 %x, 250
+  %sub = add i16 %x, -251
+  %conv4 = select i1 %cmp, i16 %sub, i16 %x
+  ret i16 %conv4
+}
+
+define i32 @sub_if_uge_C_i32(i32 signext %x) {
+; RV64I-LABEL: sub_if_uge_C_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    addiw a1, a1, -16
+; RV64I-NEXT:    sltu a1, a1, a0
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    addi a2, a2, 15
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 1048560
+; RV64ZBB-NEXT:    addi a1, a1, 15
+; RV64ZBB-NEXT:    addw a1, a0, a1
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %x, 65520
+  %sub = add i32 %x, -65521
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}
+
+define i64 @sub_if_uge_C_i64(i64 %x) {
+; RV64I-LABEL: sub_if_uge_C_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 298
+; RV64I-NEXT:    lui a2, 1046192
+; RV64I-NEXT:    addiw a1, a1, 95
+; RV64I-NEXT:    addiw a2, a2, -761
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 511
+; RV64I-NEXT:    sltu a1, a1, a0
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    slli a2, a2, 9
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 1046192
+; RV64ZBB-NEXT:    addiw a1, a1, -761
+; RV64ZBB-NEXT:    slli a1, a1, 9
+; RV64ZBB-NEXT:    add a1, a0, a1
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i64 %x, 4999999999
+  %sub = add i64 %x, -5000000000
+  %cond = select i1 %cmp, i64 %sub, i64 %x
+  ret i64 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
+; RV64I-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    lui a3, 1048560
+; RV64I-NEXT:    addiw a2, a2, -16
+; RV64I-NEXT:    sltu a2, a2, a0
+; RV64I-NEXT:    negw a4, a2
+; RV64I-NEXT:    addi a3, a3, 15
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    addw a0, a0, a3
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a2, 16
+; RV64ZBB-NEXT:    lui a3, 1048560
+; RV64ZBB-NEXT:    addiw a2, a2, -16
+; RV64ZBB-NEXT:    addi a3, a3, 15
+; RV64ZBB-NEXT:    sltu a2, a2, a0
+; RV64ZBB-NEXT:    addw a3, a0, a3
+; RV64ZBB-NEXT:    minu a0, a3, a0
+; RV64ZBB-NEXT:    sw a2, 0(a1)
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %x, 65520
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, ptr %z, align 4
+  %sub = add i32 %x, -65521
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}
+
+define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
+; RV64I-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addi a2, a2, 15
+; RV64I-NEXT:    addw a2, a0, a2
+; RV64I-NEXT:    addiw a3, a3, -16
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    bltu a3, a0, .LBB75_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:  .LBB75_2:
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a2, 1048560
+; RV64ZBB-NEXT:    addi a2, a2, 15
+; RV64ZBB-NEXT:    addw a2, a0, a2
+; RV64ZBB-NEXT:    minu a0, a2, a0
+; RV64ZBB-NEXT:    sw a2, 0(a1)
+; RV64ZBB-NEXT:    ret
+  %sub = add i32 %x, -65521
+  store i32 %sub, ptr %z, align 4
+  %cmp = icmp ugt i32 %x, 65520
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  ret i32 %cond
+}

pfusik · 2025-04-10T15:00:53Z

llvm/test/CodeGen/RISCV/rv32zbb.ll

+; RV32ZBB-LABEL: sub_if_uge_C_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    addi a1, a0, -13
+; RV32ZBB-NEXT:    zext.b a1, a1


zext.b is redundant here. How to get rid of it?

I think you'll probably need a new DAGCombine for it.

Let's address this extra optimization opportunity for i8 and i16 in a later change.

pfusik · 2025-04-10T15:01:03Z

llvm/test/CodeGen/RISCV/rv32zbb.ll

+; RV32ZBB-LABEL: sub_if_uge_C_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    addi a1, a0, -251
+; RV32ZBB-NEXT:    zext.h a1, a1


github-actions · 2025-04-10T15:02:14Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

topperc

LGTM

lukel97

LGTM

Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0

pfusik · 2025-04-11T11:42:33Z

Tests merged into main branch as b46f16c. PR rebased.

llvm-ci · 2025-04-11T13:23:28Z

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/10310

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

llvm#135194) Like llvm#134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0

mikaelholmen · 2025-05-15T08:33:00Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -12074,6 +12078,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {

    if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
      return NewSel;
+
+    // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)


@pfusik: What if the add results in poison?
select blocks poison but umin doesn't(?). We've seen a downstream miscompile that we think is because of this.

Do you have a reproducer?
If it's add nuw/nsw, I think the solution would be to drop nuw/nsw during this transform.

I think this shows the problem: https://alive2.llvm.org/ce/z/fmKmiZ

One either need to freeze the UMIN operands, or make sure that N1 and N2 is guaranteed not to be poison when doing this transform. Maybe it is enough to freeze the ADD operand (as the comparison would be poison if x is poison)?

Thanks!

Maybe it is enough to freeze the ADD operand (as the comparison would be poison if x is poison)?

I'm new to alive2, but I think it shows it won't work. The transform relies on an unsigned overflow.

I'll prepare a patch recreating the add with no nuw/nsw. alive2 says it's ok.

…nsform This fixes llvm#135194 incorrectly leaving `add nuw/nsw` while an unsigned wrap is expected.

…compile This fixes llvm#135194 incorrectly reusing the existing `add nuw/nsw` while the transformed code relies on an unsigned wrap.

…mpile (#140042) This fixes #135194 incorrectly reusing the existing `add nuw/nsw` while the transformed code relies on an unsigned wrap.

llvmbot added the llvm:SelectionDAG SelectionDAGISel as well label Apr 10, 2025

pfusik requested a review from lukel97 April 10, 2025 15:00

pfusik commented Apr 10, 2025

View reviewed changes

topperc reviewed Apr 10, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

topperc approved these changes Apr 10, 2025

View reviewed changes

lukel97 approved these changes Apr 11, 2025

View reviewed changes

pfusik added 4 commits April 11, 2025 13:41

[DAGCombiner][NFC] Refactor creating hasUMin

e6f6d07

[DAGCombiner] clang-format

d92a09b

[RISCV] Handle the pattern with swapped select operands

ee43e66

pfusik force-pushed the sub-minu-c branch from 41d5f01 to ee43e66 Compare April 11, 2025 11:41

pfusik merged commit e100d2b into llvm:main Apr 11, 2025
11 checks passed

mikaelholmen reviewed May 15, 2025

View reviewed changes

pfusik added a commit to pfusik/llvm-project that referenced this pull request May 15, 2025

[DAGCombiner] Fix the "subtraction if above a constant threshold" tra…

3748ec1

…nsform This fixes llvm#135194 incorrectly leaving `add nuw/nsw` while an unsigned wrap is expected.

pfusik mentioned this pull request May 15, 2025

[DAGCombiner] Fix the "subtraction if above a constant threshold" miscompile #140042

Merged

pfusik added a commit that referenced this pull request May 17, 2025

[DAGCombiner] Fix a "subtraction if above a constant threshold" misco…

9e22f96

…mpile (#140042) This fixes #135194 incorrectly reusing the existing `add nuw/nsw` while the transformed code relies on an unsigned wrap.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DAGCombiner] Fold subtraction if above a constant threshold to `umin` #135194

[DAGCombiner] Fold subtraction if above a constant threshold to `umin` #135194

[DAGCombiner] Fold subtraction if above a constant threshold to umin #135194

[DAGCombiner] Fold subtraction if above a constant threshold to umin #135194

Conversation

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

[DAGCombiner] Fold subtraction if above a constant threshold to `umin` #135194

[DAGCombiner] Fold subtraction if above a constant threshold to `umin` #135194