[VectorCombine][AMDGPU] Narrow Phi of Shuffles. #140188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Leon Clark (PeddleSpam) ChangesAttempt to narrow a phi of shufflevector instructions where the two incoming values have the same operands but different masks. Patch is 60.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140188.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c7d221e8d1e5c..fb052998c8790 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -131,6 +131,7 @@ class VectorCombine {
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
bool foldInterleaveIntrinsics(Instruction &I);
bool shrinkType(Instruction &I);
+ bool shrinkPhiOfShuffles(Instruction &I);
void replaceValue(Value &Old, Value &New) {
LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
@@ -3483,6 +3484,109 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
return true;
}
+// Attempt to narrow a phi of shufflevector instructions where the two incoming
+// values have the same operands but different masks. If the two shuffle masks
+// are offsets of one another we can use one branch to rotate the incoming
+// vector and perform one larger shuffle after the phi.
+bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
+ auto *Phi = dyn_cast<PHINode>(&I);
+ if (!Phi || Phi->getNumIncomingValues() != 2u)
+ return false;
+
+ auto *Shuf0 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(0u));
+ auto *Shuf1 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(1u));
+ if (!Shuf0 || !Shuf1)
+ return false;
+
+ if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse())
+ return false;
+
+ auto *Shuf0Op0 = Shuf0->getOperand(0u);
+ auto *Shuf0Op1 = Shuf0->getOperand(1u);
+ auto *Shuf1Op0 = Shuf1->getOperand(0u);
+ auto *Shuf1Op1 = Shuf1->getOperand(1u);
+
+ auto IsPoison = [](Value *Val) -> bool {
+ return isa<PoisonValue>(Val) || isa<UndefValue>(Val);
+ };
+
+ if (Shuf0Op0 != Shuf1Op0 || !IsPoison(Shuf0Op1) || !IsPoison(Shuf1Op1))
+ return false;
+
+ // Ensure result vectors are wider than the argument vector.
+ auto *InputVT = cast<FixedVectorType>(Shuf0Op0->getType());
+ auto *ResultVT = cast<FixedVectorType>(Shuf0->getType());
+ auto const InputNumElements = InputVT->getNumElements();
+
+ if (InputNumElements >= ResultVT->getNumElements())
+ return false;
+
+ // Take the difference of the two shuffle masks at each index. Ignore poison
+ // values at the same index in both masks.
+ auto Mask0 = Shuf0->getShuffleMask();
+ auto Mask1 = Shuf1->getShuffleMask();
+ auto NewMask0 = std::vector<int>();
+ NewMask0.reserve(Mask0.size());
+
+ for (auto I = 0u; I < Mask0.size(); ++I) {
+ if (Mask0[I] >= 0 && Mask1[I] >= 0)
+ NewMask0.push_back(Mask0[I] - Mask1[I]);
+ else if (Mask0[I] == -1 && Mask1[I] == -1)
+ continue;
+ else
+ return false;
+ }
+
+ if (NewMask0.empty() ||
+ !std::equal(NewMask0.begin() + 1u, NewMask0.end(), NewMask0.begin()))
+ return false;
+
+ // Create new mask using difference of the two incoming masks.
+ auto MaskOffset = NewMask0[0u];
+ if (!Shuf0->hasOneUse()) {
+ std::swap(Shuf0, Shuf1);
+ std::swap(Mask0, Mask1);
+ MaskOffset *= -1;
+ }
+
+ auto Index = (InputNumElements - MaskOffset) % InputNumElements;
+ NewMask0.clear();
+
+ for (auto I = 0u; I < InputNumElements; ++I) {
+ NewMask0.push_back(Index);
+ Index = (Index + 1u) % InputNumElements;
+ }
+
+ // Calculate costs for worst cases and compare.
+ auto const Kind = TTI::SK_PermuteSingleSrc;
+ auto OldCost = std::max(TTI.getShuffleCost(Kind, InputVT, Mask0, CostKind),
+ TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind));
+ auto NewCost = TTI.getShuffleCost(Kind, InputVT, NewMask0, CostKind) +
+ TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind);
+
+ if (NewCost > OldCost)
+ return false;
+
+ // Create new shuffles and narrowed phi.
+ auto Builder = IRBuilder(&I);
+ Builder.SetInsertPoint(Shuf0);
+ Builder.SetCurrentDebugLocation(Shuf0->getDebugLoc());
+ auto *NewShuf0 = Builder.CreateShuffleVector(Shuf0Op0, Shuf0Op1, NewMask0);
+
+ Builder.SetInsertPoint(Phi);
+ Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+ auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
+ NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
+ NewPhi->addIncoming(Shuf1Op0, Phi->getIncomingBlock(1u));
+
+ Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
+ auto *NewPoison = PoisonValue::get(NewPhi->getType());
+ auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, NewPoison, Mask1);
+
+ replaceValue(*Phi, *NewShuf2);
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -3561,6 +3665,9 @@ bool VectorCombine::run() {
case Instruction::BitCast:
MadeChange |= foldBitcastShuffle(I);
break;
+ case Instruction::PHI:
+ MadeChange |= shrinkPhiOfShuffles(I);
+ break;
default:
MadeChange |= shrinkType(I);
break;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
new file mode 100644
index 0000000000000..9b4c4fc9a702d
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
@@ -0,0 +1,1266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
+
+define <2 x i8> @shuffle_v2i8(<2 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i8> @shuffle_v2i8(
+; CHECK-SAME: <2 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <2 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <2 x i8> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <2 x i8> [ %val1, %then ], [ %val2, %else ]
+ ret <2 x i8> %val3
+}
+
+define <4 x i8> @shuffle_v4i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i8> @shuffle_v4i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <4 x i8> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <4 x i8> [ %val1, %then ], [ %val2, %else ]
+ ret <4 x i8> %val3
+}
+
+define <8 x i8> @shuffle_v8i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i8> @shuffle_v8i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x i8> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <8 x i8> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x i8> %val3
+}
+
+define <16 x i8> @shuffle_v16i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i8> @shuffle_v16i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <16 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <16 x i8> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <16 x i8> [ %val1, %then ], [ %val2, %else ]
+ ret <16 x i8> %val3
+}
+
+define <32 x i8> @shuffle_v32i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i8> @shuffle_v32i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <32 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <32 x i8> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <32 x i8> [ %val1, %then ], [ %val2, %else ]
+ ret <32 x i8> %val3
+}
+
+define <2 x i16> @shuffle_v2i16(<2 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i16> @shuffle_v2i16(
+; CHECK-SAME: <2 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <2 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <2 x i16> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <2 x i16> [ %val1, %then ], [ %val2, %else ]
+ ret <2 x i16> %val3
+}
+
+define <4 x i16> @shuffle_v4i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i16> @shuffle_v4i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <4 x i16> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <4 x i16> [ %val1, %then ], [ %val2, %else ]
+ ret <4 x i16> %val3
+}
+
+define <8 x i16> @shuffle_v8i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i16> @shuffle_v8i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x i16> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <8 x i16> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x i16> %val3
+}
+
+define <16 x i16> @shuffle_v16i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i16> @shuffle_v16i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <16 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <16 x i16> [[VAL3]]
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ tail call void @func0()
+ br label %finally
+
+else:
+ %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ tail call void @func1()
+ br label %finally
+
+finally:
+ %val3 = phi <16 x i16> [ %val1, %then ], [ %val2, %else ]
+ ret <16 x i16> %val3
+}
+
+define <32 x i16> @shuffle_v32i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i16> @shuffle_v32i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: tail call void @func0()
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: tail call void @func1()
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]...
[truncated]
|
auto *Shuf0Op0 = Shuf0->getOperand(0u); | ||
auto *Shuf0Op1 = Shuf0->getOperand(1u); | ||
auto *Shuf1Op0 = Shuf1->getOperand(0u); | ||
auto *Shuf1Op1 = Shuf1->getOperand(1u); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no auto
auto IsPoison = [](Value *Val) -> bool { | ||
return isa<PoisonValue>(Val) || isa<UndefValue>(Val); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove this, it's actively misleading. It's equivalent to the isa<UndefValue>
// values at the same index in both masks. | ||
auto Mask0 = Shuf0->getShuffleMask(); | ||
auto Mask1 = Shuf1->getShuffleMask(); | ||
auto NewMask0 = std::vector<int>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to use SmallVector<int, 16>
|
||
// Take the difference of the two shuffle masks at each index. Ignore poison | ||
// values at the same index in both masks. | ||
auto Mask0 = Shuf0->getShuffleMask(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto -> ArrayRef
return false; | ||
|
||
if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse()) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can almost certainly make this clearer with PatternMatch
return false; | ||
} | ||
|
||
if (NewMask0.empty() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More comments
auto Index = (InputNumElements - MaskOffset) % InputNumElements; | ||
NewMask0.clear(); | ||
|
||
for (auto I = 0u; I < InputNumElements; ++I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) don't use auto
Attempt to narrow a phi of shufflevector instructions where the two incoming values have the same operands but different masks.
Related to #128938.