8000 Update on "[Cutlass] Handle broadcasting in EVT python codegen" · pytorch/pytorch@ed34760 · GitHub
[go: up one dir, main page]

Skip to content

Commit ed34760

Browse files
committed
Update on "[Cutlass] Handle broadcasting in EVT python codegen"
Previously merged: * #151713 * #151405 * #150905 * #152306 * #152305 cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx ipiszy chenyang78 kadeng muchulee8 amjames chauhang aakhundov [ghstack-poisoned]
2 parents 573ad2a + 5cecc8a commit ed34760

File tree

92 files changed

+1682
-503
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1682
-503
lines changed

.ci/docker/build.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,11 @@ case "$image" in
241241
CONDA_CMAKE=yes
242242
TRITON=yes
243243
;;
244-
pytorch-linux-jammy-rocm-n-1-py3)
244+
pytorch-linux-focal-rocm-n-1-py3)
245245
ANACONDA_PYTHON_VERSION=3.10
246246
GCC_VERSION=11
247247
VISION=yes
248-
ROCM_VERSION=6.3
248+
ROCM_VERSION=6.2.4
249249
NINJA_VERSION=1.9.0
250250
CONDA_CMAKE=yes
251251
TRITON=yes
@@ -254,11 +254,11 @@ case "$image" in
254254
UCC_COMMIT=${_UCC_COMMIT}
255255
INDUCTOR_BENCHMARKS=yes
256256
;;
257-
pytorch-linux-jammy-rocm-n-py3)
257+
pytorch-linux-focal-rocm-n-py3)
258258
ANACONDA_PYTHON_VERSION=3.10
259259
GCC_VERSION=11
260260
VISION=yes
261-
ROCM_VERSION=6.4
261+
ROCM_VERSION=6.3
262262
NINJA_VERSION=1.9.0
263263
CONDA_CMAKE=yes
264264
10000 TRITON=yes

.ci/docker/common/install_rocm.sh

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,17 @@ EOF
6666
done
6767

6868
# ROCm 6.3 had a regression where initializing static code objects had significant overhead
69-
# ROCm 6.4 did not yet fix the regression, also HIP branch names are different
70-
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]] || [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
71-
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
72-
HIP_BRANCH=rocm-6.3.x
73-
VER_STR=6.3
74-
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
75-
HIP_BRANCH=release/rocm-rel-6.4
76-
VER_STR=6.4
77-
fi
69+
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
7870
# clr build needs CppHeaderParser but can only find it using conda's python
7971
/opt/conda/bin/python -m pip install CppHeaderParser
80-
git clone https://github.com/ROCm/HIP -b $HIP_BRANCH
72+
git clone https://github.com/ROCm/HIP -b rocm-6.3.x
8173
HIP_COMMON_DIR=$(readlink -f HIP)
82-
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-${VER_STR}-statco-hotfix
74+
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
8375
mkdir -p clr/build
8476
pushd clr/build
8577
cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
8678
make -j
87-
cp hipamd/lib/libamdhip64.so.${VER_STR}.* /opt/rocm/lib/libamdhip64.so.${VER_STR}.*
79+
cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
8880
popd
8981
rm -rf HIP clr
9082
fi

.github/actions/setup-rocm/action.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,6 @@ description: Set up ROCm host for CI
55
runs:
66
using: composite
77
steps:
8-
- name: Runner ROCm version
9-
if: always()
10-
shell: bash
11-
run: |
12-
dpkg -l | grep -E " rocm"
13-
148
- name: Stop all running docker containers
159
if: always()
1610
shell: bash

.github/workflows/docker-builds.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ jobs:
6161
pytorch-linux-focal-py3.11-clang10,
6262
pytorch-linux-focal-py3.12-clang10,
6363
pytorch-linux-focal-py3.13-clang10,
64-
pytorch-linux-jammy-rocm-n-1-py3,
65-
pytorch-linux-jammy-rocm-n-py3,
64+
pytorch-linux-focal-rocm-n-1-py3,
65+
pytorch-linux-focal-rocm-n-py3,
6666
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12,
6767
pytorch-linux-jammy-py3.9-gcc11,
6868
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks,

.github/workflows/docker-cache-mi300.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
id: calculate-docker-image
4242
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
4343
with:
44-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
44+
docker-image-name: pytorch-linux-focal-rocm-n-py3
4545
push: false
4646

4747
- name: Pull docker image

.github/workflows/inductor-perf-test-nightly-rocm.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,13 @@ jobs:
7878
curr_branch: ${{ github.head_ref || github.ref_name }}
7979
curr_ref_type: ${{ github.ref_type }}
8080

81-
linux-jammy-rocm-py3_10-inductor-benchmark-build:
81+
linux-focal-rocm-py3_10-inductor-benchmark-build:
8282
if: github.repository_owner == 'pytorch'
8383
name: rocm-py3_10-inductor-benchmark-build
8484
uses: ./.github/workflows/_linux-build.yml
8585
with:
86-
build-environment: linux-jammy-rocm-py3_10
87-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
86+
build-environment: linux-focal-rocm-py3_10
87+
docker-image-name: pytorch-linux-focal-rocm-n-py3
8888
test-matrix: |
8989
{ include: [
9090
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.2" },
@@ -102,18 +102,18 @@ jobs:
102102
]}
103103
secrets: inherit
104104

105-
linux-jammy-rocm-py3_10-inductor-benchmark-test:
105+
linux-focal-rocm-py3_10-inductor-benchmark-test:
106106
permissions:
107107
id-token: write
108108
contents: read
109109
name: rocm-py3_10-inductor-benchmark-test
110110
uses: ./.github/workflows/_rocm-test.yml
111-
needs: linux-jammy-rocm-py3_10-inductor-benchmark-build
111+
needs: linux-focal-rocm-py3_10-inductor-benchmark-build
112112
with:
113-
build-environment: linux-jammy-rocm-py3_10
113+
build-environment: linux-focal-rocm-py3_10
114114
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
115-
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
116-
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
115+
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
116+
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
117117
timeout-minutes: 720
118118
# Disable monitor in perf tests for more investigation
119119
disable-monitor: true

.github/workflows/inductor-periodic.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ jobs:
6767
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
6868
secrets: inherit
6969

70-
linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build:
70+
linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build:
7171
if: github.repository_owner == 'pytorch'
7272
name: rocm-py3_10-periodic-dynamo-benchmarks
7373
uses: ./.github/workflows/_linux-build.yml
7474
with:
75-
build-environment: linux-jammy-rocm-py3_10
76-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
75+
build-environment: linux-focal-rocm-py3_10
76+
docker-image-name: pytorch-linux-focal-rocm-n-py3
7777
sync-tag: rocm-build
7878
test-matrix: |
7979
{ include: [
@@ -95,17 +95,17 @@ jobs:
9595
]}
9696
secrets: inherit
9797

98-
linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-test:
98+
linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-test:
9999
permissions:
100100
id-token: write
101101
contents: read
102102
name: rocm-py3_10-periodic-dynamo-benchmarks
103103
uses: ./.github/workflows/_rocm-test.yml
104-
needs: linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build
104+
needs: linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build
105105
with:
106-
build-environment: linux-jammy-rocm-py3_10
107-
docker-image: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.docker-image }}
108-
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
106+
build-environment: linux-focal-rocm-py3_10
107+
docker-image: ${{ needs.linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.docker-image }}
108+
test-matrix: ${{ needs.linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
109109
secrets: inherit
110110

111111
linux-focal-cuda12_6-py3_10-gcc9-inductor-build-gcp:

.github/workflows/inductor-rocm-mi300.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,30 +36,30 @@ jobs:
3636
curr_branch: ${{ github.head_ref || github.ref_name }}
3737
curr_ref_type: ${{ github.ref_type }}
3838

39-
linux-jammy-rocm-py3_10-inductor-build:
39+
linux-focal-rocm-py3_10-inductor-build:
4040
name: rocm-py3.10-inductor
4141
uses: ./.github/workflows/_linux-build.yml
4242
needs: get-label-type
4343
with:
4444
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
45-
build-environment: linux-jammy-rocm-py3.10
46-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
45+
build-environment: linux-focal-rocm-py3.10
46+
docker-image-name: pytorch-linux-focal-rocm-n-py3
4747
test-matrix: |
4848
{ include: [
4949
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
5050
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
5151
]}
5252
secrets: inherit
5353

54-
linux-jammy-rocm-py3_10-inductor-test:
54+
linux-focal-rocm-py3_10-inductor-test:
5555
permissions:
5656
id-token: write
5757
contents: read
5858
name: rocm-py3.10-inductor
5959
uses: ./.github/workflows/_rocm-test.yml
60-
needs: linux-jammy-rocm-py3_10-inductor-build
60+
needs: linux-focal-rocm-py3_10-inductor-build
6161
with:
62-
build-environment: linux-jammy-rocm-py3.10
63-
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.docker-image }}
64-
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.test-matrix }}
62+
build-environment: linux-focal-rocm-py3.10
63+
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.docker-image }}
64+
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.test-matrix }}
6565
secrets: inherit

.github/workflows/inductor-rocm.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,30 @@ jobs:
2929
curr_branch: ${{ github.head_ref || github.ref_name }}
3030
curr_ref_type: ${{ github.ref_type }}
3131

32-
linux-jammy-rocm-py3_10-inductor-build:
32+
linux-focal-rocm-py3_10-inductor-build:
3333
name: rocm-py3.10-inductor
3434 uses: ./.github/workflows/_linux-build.yml
3535
needs: get-label-type
3636
with:
3737
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
38-
build-environment: linux-jammy-rocm-py3.10
39-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
38+
build-environment: linux-focal-rocm-py3.10
39+
docker-image-name: pytorch-linux-focal-rocm-n-py3
4040
test-matrix: |
4141
{ include: [
4242
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2" },
4343
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
4444
]}
4545
secrets: inherit
4646

47-
linux-jammy-rocm-py3_10-inductor-test:
47+
linux-focal-rocm-py3_10-inductor-test:
4848
permissions:
4949
id-token: write
5050
contents: read
5151
name: rocm-py3.10-inductor
5252
uses: ./.github/workflows/_rocm-test.yml
53-
needs: linux-jammy-rocm-py3_10-inductor-build
53+
needs: linux-focal-rocm-py3_10-inductor-build
5454
with:
55-
build-environment: linux-jammy-rocm-py3.10
56-
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.docker-image }}
57-
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.test-matrix }}
55+
build-environment: linux-focal-rocm-py3.10
56+
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.docker-image }}
57+
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.test-matrix }}
5858
secrets: inherit

.github/workflows/periodic-rocm-mi300.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ jobs:
4949
curr_branch: ${{ github.head_ref || github.ref_name }}
5050
curr_ref_type: ${{ github.ref_type }}
5151

52-
linux-jammy-rocm-py3_10-build:
53-
name: linux-jammy-rocm-py3.10
52+
linux-focal-rocm-py3_10-build:
53+
name: linux-focal-rocm-py3.10
5454
uses: ./.github/workflows/_linux-build.yml
5555
needs: get-label-type
5656
with:
5757
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
58-
build-environment: linux-jammy-rocm-py3.10
59-
docker-image-name: pytorch-linux-jammy-rocm-n-py3
58+
build-environment: linux-focal-rocm-py3.10
59+
docker-image-name: pytorch-linux-focal-rocm-n-py3
6060
test-matrix: |
6161
{ include: [
6262
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
@@ -65,17 +65,17 @@ jobs:
6565
]}
6666
secrets: inherit
6767

68-
linux-jammy-rocm-py3_10-test:
68+
linux-focal-rocm-py3_10-test:
6969
permissions:
7070
id-token: write
7171
contents: read
72-
name: linux-jammy-rocm-py3.10
72+
name: linux-focal-rocm-py3.10
7373
uses: ./.github/workflows/_rocm-test.yml
7474
needs:
75-
- linux-jammy-rocm-py3_10-build
75+
- linux-focal-rocm-py3_10-build
7676
- target-determination
7777
with:
78-
build-environment: linux-jammy-rocm-py3.10
79-
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
80-
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
78+
build-environment: linux-focal-rocm-py3.10
79+
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
80+
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
8181
secrets: inherit

0 commit comments

Comments
 (0)
0