8000 [VectorCombine][AMDGPU] Narrow Phi of Shuffles. by PeddleSpam · Pull Request #140188 · llvm/llvm-project · GitHub
[go: up one dir, main page]

Skip to content

[VectorCombine][AMDGPU] Narrow Phi of Shuffles. #140188

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

PeddleSpam
Copy link
Contributor
@PeddleSpam PeddleSpam commented May 16, 2025

Attempt to narrow a phi of shufflevector instructions where the two incoming values have the same operands but different masks.

Related to #128938.

@llvmbot
Copy link
Member
llvmbot commented May 16, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Leon Clark (PeddleSpam)

Changes

Attempt to narrow a phi of shufflevector instructions where the two incoming values have the same operands but different masks.


Patch is 60.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140188.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+107)
  • (added) llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll (+1266)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c7d221e8d1e5c..fb052998c8790 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -131,6 +131,7 @@ class VectorCombine {
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
   bool foldInterleaveIntrinsics(Instruction &I);
   bool shrinkType(Instruction &I);
+  bool shrinkPhiOfShuffles(Instruction &I);
 
   void replaceValue(Value &Old, Value &New) {
     LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
@@ -3483,6 +3484,109 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
   return true;
 }
 
+// Attempt to narrow a phi of shufflevector instructions where the two incoming
+// values have the same operands but different masks. If the two shuffle masks
+// are offsets of one another we can use one branch to rotate the incoming
+// vector and perform one larger shuffle after the phi.
+bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
+  auto *Phi = dyn_cast<PHINode>(&I);
+  if (!Phi || Phi->getNumIncomingValues() != 2u)
+    return false;
+
+  auto *Shuf0 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(0u));
+  auto *Shuf1 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(1u));
+  if (!Shuf0 || !Shuf1)
+    return false;
+
+  if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse())
+    return false;
+
+  auto *Shuf0Op0 = Shuf0->getOperand(0u);
+  auto *Shuf0Op1 = Shuf0->getOperand(1u);
+  auto *Shuf1Op0 = Shuf1->getOperand(0u);
+  auto *Shuf1Op1 = Shuf1->getOperand(1u);
+
+  auto IsPoison = [](Value *Val) -> bool {
+    return isa<PoisonValue>(Val) || isa<UndefValue>(Val);
+  };
+
+  if (Shuf0Op0 != Shuf1Op0 || !IsPoison(Shuf0Op1) || !IsPoison(Shuf1Op1))
+    return false;
+
+  // Ensure result vectors are wider than the argument vector.
+  auto *InputVT = cast<FixedVectorType>(Shuf0Op0->getType());
+  auto *ResultVT = cast<FixedVectorType>(Shuf0->getType());
+  auto const InputNumElements = InputVT->getNumElements();
+
+  if (InputNumElements >= ResultVT->getNumElements())
+    return false;
+
+  // Take the difference of the two shuffle masks at each index. Ignore poison
+  // values at the same index in both masks.
+  auto Mask0 = Shuf0->getShuffleMask();
+  auto Mask1 = Shuf1->getShuffleMask();
+  auto NewMask0 = std::vector<int>();
+  NewMask0.reserve(Mask0.size());
+
+  for (auto I = 0u; I < Mask0.size(); ++I) {
+    if (Mask0[I] >= 0 && Mask1[I] >= 0)
+      NewMask0.push_back(Mask0[I] - Mask1[I]);
+    else if (Mask0[I] == -1 && Mask1[I] == -1)
+      continue;
+    else
+      return false;
+  }
+
+  if (NewMask0.empty() ||
+      !std::equal(NewMask0.begin() + 1u, NewMask0.end(), NewMask0.begin()))
+    return false;
+
+  // Create new mask using difference of the two incoming masks.
+  auto MaskOffset = NewMask0[0u];
+  if (!Shuf0->hasOneUse()) {
+    std::swap(Shuf0, Shuf1);
+    std::swap(Mask0, Mask1);
+    MaskOffset *= -1;
+  }
+
+  auto Index = (InputNumElements - MaskOffset) % InputNumElements;
+  NewMask0.clear();
+
+  for (auto I = 0u; I < InputNumElements; ++I) {
+    NewMask0.push_back(Index);
+    Index = (Index + 1u) % InputNumElements;
+  }
+
+  // Calculate costs for worst cases and compare.
+  auto const Kind = TTI::SK_PermuteSingleSrc;
+  auto OldCost = std::max(TTI.getShuffleCost(Kind, InputVT, Mask0, CostKind),
+                          TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind));
+  auto NewCost = TTI.getShuffleCost(Kind, InputVT, NewMask0, CostKind) +
+                 TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind);
+
+  if (NewCost > OldCost)
+    return false;
+
+  // Create new shuffles and narrowed phi.
+  auto Builder = IRBuilder(&I);
+  Builder.SetInsertPoint(Shuf0);
+  Builder.SetCurrentDebugLocation(Shuf0->getDebugLoc());
+  auto *NewShuf0 = Builder.CreateShuffleVector(Shuf0Op0, Shuf0Op1, NewMask0);
+
+  Builder.SetInsertPoint(Phi);
+  Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+  auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
+  NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
+  NewPhi->addIncoming(Shuf1Op0, Phi->getIncomingBlock(1u));
+
+  Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
+  auto *NewPoison = PoisonValue::get(NewPhi->getType());
+  auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, NewPoison, Mask1);
+
+  replaceValue(*Phi, *NewShuf2);
+  return true;
+}
+
 /// This is the entry point for all transforms. Pass manager differences are
 /// handled in the callers of this function.
 bool VectorCombine::run() {
@@ -3561,6 +3665,9 @@ bool VectorCombine::run() {
       case Instruction::BitCast:
         MadeChange |= foldBitcastShuffle(I);
         break;
+      case Instruction::PHI:
+        MadeChange |= shrinkPhiOfShuffles(I);
+        break;
       default:
         MadeChange |= shrinkType(I);
         break;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
new file mode 100644
index 0000000000000..9b4c4fc9a702d
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
@@ -0,0 +1,1266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
+
+define <2 x i8> @shuffle_v2i8(<2 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i8> @shuffle_v2i8(
+; CHECK-SAME: <2 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x i8> %val3
+}
+
+define <4 x i8> @shuffle_v4i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i8> @shuffle_v4i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i8> %val3
+}
+
+define <8 x i8> @shuffle_v8i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i8> @shuffle_v8i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i8> %val3
+}
+
+define <16 x i8> @shuffle_v16i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i8> @shuffle_v16i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x i8> %val3
+}
+
+define <32 x i8> @shuffle_v32i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i8> @shuffle_v32i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x i8> %val3
+}
+
+define <2 x i16> @shuffle_v2i16(<2 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i16> @shuffle_v2i16(
+; CHECK-SAME: <2 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x i16> %val3
+}
+
+define <4 x i16> @shuffle_v4i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i16> @shuffle_v4i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i16> %val3
+}
+
+define <8 x i16> @shuffle_v8i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i16> @shuffle_v8i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i16> %val3
+}
+
+define <16 x i16> @shuffle_v16i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i16> @shuffle_v16i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x i16> %val3
+}
+
+define <32 x i16> @shuffle_v32i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i16> @shuffle_v32i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]...
[truncated]

@PeddleSpam PeddleSpam requested review from RKSimon and arsenm May 16, 2025 05:02
@RKSimon RKSimon requested review from davemgreen and dtcxzyw May 16, 2025 06:00
Comment on lines +3504 to +3507
auto *Shuf0Op0 = Shuf0->getOperand(0u);
auto *Shuf0Op1 = Shuf0->getOperand(1u);
auto *Shuf1Op0 = Shuf1->getOperand(0u);
auto *Shuf1Op1 = Shuf1->getOperand(1u);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no auto

Comment on lines +3509 to +3511
auto IsPoison = [](Value *Val) -> bool {
return isa<PoisonValue>(Val) || isa<UndefValue>(Val);
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this, it's actively misleading. It's equivalent to the isa<UndefValue>

// values at the same index in both masks.
auto Mask0 = Shuf0->getShuffleMask();
auto Mask1 = Shuf1->getShuffleMask();
auto NewMask0 = std::vector<int>();
8000 Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to use SmallVector<int, 16>


// Take the difference of the two shuffle masks at each index. Ignore poison
// values at the same index in both masks.
auto Mask0 = Shuf0->getShuffleMask();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto -> ArrayRef

return false;

if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse())
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can almost certainly make this clearer with PatternMatch

return false;
}

if (NewMask0.empty() ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More comments

auto Index = (InputNumElements - MaskOffset) % InputNumElements;
NewMask0.clear();

for (auto I = 0u; I < InputNumElements; ++I) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) don't use auto

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants
0