8000 [AMDGPU] Handle unset/max flat workgroup size in waves/EU · llvm/llvm-project@c40eaae · GitHub
[go: up one dir, main page]

Skip to content

Commit c40eaae

Browse files
committed
[AMDGPU] Handle unset/max flat workgroup size in waves/EU
When `amdgpu-flat-work-group-size` is either missed or set to maximum allowed [1, 1024], attributor won't change the state, which later results to `FlatWorkGroupSize != 0` assertion.
1 parent 76dba2e commit c40eaae

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,13 +1170,19 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11701170
!AssumedGroupSize->isValidState())
11711171
return false;
11721172

1173+
unsigned MinFWGSize =
1174+
AssumedGroupSize->getAssumed().getLower().getZExtValue();
1175+
unsigned MaxFWGSize =
1176+
AssumedGroupSize->getAssumed().getUpper().getZExtValue();
1177+
if (MinFWGSize == 0 && MaxFWGSize == 0)
1178+
std::tie(MinFWGSize, MaxFWGSize) =
1179+
InfoCache.getDefaultFlatWorkGroupSize(*Func);
11731180
unsigned Min, Max;
11741181
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
11751182
*Caller,
11761183
{CallerInfo->getAssumed().getLower().getZExtValue(),
11771184
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1178-
{AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1179-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1185+
{MinFWGSize, MaxFWGSize - 1});
11801186
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
11811187
IntegerRangeState CallerRangeState(CallerRange);
11821188
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 -passes=amdgpu-attributor %s | FileCheck %s
2+
3+
; CHECK-LABEL: define internal fastcc void @call1(
4+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]]
5+
define internal fastcc void @call1() #0 {
6+
tail call fastcc void @call2()
7+
ret void
8+
}
9+
10+
; CHECK-LABEL: define internal fastcc void @call2(
11+
; CHECK-SAME: ) #[[ATTR0]]
12+
define internal fastcc void @call2() #1 {
13+
tail call fastcc void @call5()
14+
ret void
15+
}
16+
17+
; CHECK-LABEL: define { ptr addrspace(1), ptr } @call3(
18+
; CHECK-SAME:) #[[ATTR0]]
19+
define { ptr addrspace(1), ptr } @call3() #2 {
20+
tail call fastcc void @call5()
21+
ret { ptr addrspace(1), ptr } zeroinitializer
22+
}
23+
24+
; CHECK-LABEL: define internal fastcc void @call5(
25+
; CHECK-SAME: ) #[[ATTR0]]
26+
define internal fastcc void @call5() {
27+
tail call fastcc void @call1()
28+
ret void
29+
}
30+
31+
attributes #0 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
32+
attributes #1 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
33+
attributes #2 = {"amdgpu-flat-work-group-size"="1, 256" "target-cpu"="gfx942" }
34+
35+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx942" "uniform-work-group-size"="false" }

0 commit comments

Comments
 (0)
0