8000 Bump triton pin. Add aarch64 triton build (#148705) · pytorch/pytorch@3945954 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3945954

Browse files
atalmanpytorchmergebot
authored andcommitted
Bump triton pin. Add aarch64 triton build (#148705)
1. Bumps pin for triton to release/3.3.x branch 2. Bump pin for triton-xpu 3. Remove ROCm xfail tests 4. Add aarch64 triton build: * Depends on: #148768 * Fixes: #130558 Pull Request resolved: #148705 Approved by: https://github.com/drisspg, https://github.com/Skylion007, https://github.com/EikanWang
1 parent c983e11 commit 3945954

File tree

6 files changed

+20
-11
lines changed

6 files changed

+20
-11
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
cfb7d5314748542fed42d0837bef1a6d177da2d6
1+
83111ab22be6e4a588d184ac45175986a7dde9fc

.ci/docker/ci_commit_pins/triton.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4b3bb1f8da0ded6ccd572dd1358ef45af5a1befe
1+
ab727c406a3805fe0acfb63a18ead299ff7cf05c

.ci/docker/triton_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.2.0
1+
3.3.0

.github/scripts/build_triton_wheel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def main() -> None:
123123
parser = ArgumentParser("Build Triton binaries")
124124
parser.add_argument("--release", action="store_true")
125125
parser.add_argument(
126-
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu"]
126+
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu", "aarch64"]
127127
)
128128
parser.add_argument("--py-version", type=str)
129129
parser.add_argument("--commit-hash", type=str)

.github/workflows/build-triton-wheel.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,29 @@ jobs:
4040
build-wheel:
4141
name: "Build Triton Wheel"
4242
needs: get-label-type
43-
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
43+
runs-on: ${{ matrix.runs_on }}
4444
strategy:
4545
fail-fast: false
4646
matrix:
4747
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t" ]
48-
device: ["cuda", "rocm", "xpu"]
48+
device: ["cuda", "rocm", "xpu", "aarch64"]
4949
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
5050
include:
5151
- device: "rocm"
5252
rocm_version: "6.3"
53+
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
5354
- device: "cuda"
5455
rocm_version: ""
56+
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
57+
- device: "xpu"
58+
rocm_version: ""
59+
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
60+
- device: "aarch64"
61+
rocm_version: ""
62+
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge"
5563
timeout-minutes: 40
5664
env:
57-
DOCKER_IMAGE: ${{ matrix.device == 'rocm' && format('pytorch/manylinux2_28-builder:rocm{0}', matrix.rocm_version) || matrix.docker-image }}
65+
DOCKER_IMAGE: ${{ matrix.device == 'rocm' && format('pytorch/manylinux2_28-builder:rocm{0}', matrix.rocm_version) || matrix.device == 'aarch64' && 'pytorch/manylinux2_28_aarch64-builder:cpu-aarch64' || matrix.docker-image }}
5866
PY_VERS: ${{ matrix.py_vers }}
5967
BUILD_DEVICE: ${{ matrix.device }}
6068
PLATFORM: 'manylinux_2_28_x86_64'
@@ -125,18 +133,21 @@ jobs:
125133
fi
126134
127135
docker exec -t "${container_name}" yum install -y zlib-devel zip
128-
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U setuptools==67.4.0 pybind11==2.13.1 auditwheel
129-
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "rocm") ]]; then
136+
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U setuptools==67.4.0 pybind11==2.13.1 auditwheel wheel
137+
138+
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "rocm" || "${{ matrix.device }}" == "aarch64" ) ]]; then
130139
# With this install, it gets clang 16.0.6.
131140
docker exec -t "${container_name}" dnf install clang lld -y
132141
WITH_CLANG_LDD="--with-clang-ldd"
133142
fi
143+
134144
if [[ "${BUILD_DEVICE}" == xpu ]]; then
135145
docker exec -t "${container_name}" bash -c "dnf install -y gcc-toolset-13-gcc-c++"
136146
docker exec -t "${container_name}" bash -c "source /opt/rh/gcc-toolset-13/enable && ${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE"
137147
else
138148
docker exec -t "${container_name}" bash -c "${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE $WITH_CLANG_LDD"
139149
fi
150+
140151
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "xpu") ]]; then
141152
docker exec -t "${container_name}" bash -c "auditwheel repair --plat ${PLATFORM} //artifacts/*.whl"
142153
else

test/inductor/test_torchinductor_codegen_dynamic_shapes.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,9 +386,7 @@ def run(*ex, **kwargs):
386386
if TEST_WITH_ROCM:
387387
test_failures.update(
388388
{
389-
"test_split_cumsum_dynamic_shapes": TestFailure(("cpu", "cuda")),
390389
"test_split_cumsum_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")),
391-
"test_split_cumprod_dynamic_shapes": TestFailure(("cpu", "cuda")),
392390
"test_split_cumprod_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")),
393391
}
394392
)

0 commit comments

Comments
 (0)
0