10000 [CI][CUDA] Move away from cuda12.4, Add cuda12.6 eager CI tests by tinglvv · Pull Request #148602 · pytorch/pytorch · GitHub
[go: up one dir, main page]

Skip to content

[CI][CUDA] Move away from cuda12.4, Add cuda12.6 eager CI tests #148602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ fi
# configuration, so we hardcode everything here rather than do it
# from scratch
case "$image" in
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
CUDA_VERSION=12.4.1
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
Expand Down Expand Up @@ -326,15 +326,15 @@ case "$image" in
EXECUTORCH=yes
;;
pytorch-linux-jammy-py3.12-halide)
CUDA_VERSION=12.4
CUDA_VERSION=12.6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Halide is part of inductor, so not sure why you are modifying it here.

ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
HALIDE=yes
TRITON=yes
;;
pytorch-linux-jammy-py3.12-triton-cpu)
CUDA_VERSION=12.4
CUDA_VERSION=12.6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sounds like an oxymoron: why CPU builds needs CUDA (and what this config is for to begin with)

ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
matrix:
runner: [linux.12xlarge]
docker-image-name: [
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks,
Expand Down
70 changes: 35 additions & 35 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}

linux-focal-cuda12_4-py3_10-gcc9-build:
name: linux-focal-cuda12.4-py3.10-gcc9
linux-focal-cuda12_6-py3_10-gcc9-build:
name: linux-focal-cuda12.6-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
Expand All @@ -67,16 +67,16 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-test:
name: linux-focal-cuda12.4-py3.10-gcc9
linux-focal-cuda12_6-py3_10-gcc9-test:
name: linux-focal-cuda12.6-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-build
- linux-focal-cuda12_6-py3_10-gcc9-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.test-matrix }}
secrets: inherit

linux-focal-cuda11_8-py3_9-gcc9-build:
Expand Down Expand Up @@ -170,16 +170,16 @@ jobs:
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:
name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build:
name: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
if: false # See https://github.com/pytorch/pytorch/issues/138750
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
use_split_build: true
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
Expand All @@ -190,16 +190,16 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build-test:
name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build-test:
name: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build
- linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
secrets: inherit


Expand Down Expand Up @@ -265,14 +265,14 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
secrets: inherit

linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build:
name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build:
name: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
Expand All @@ -287,28 +287,28 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-test:
name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-test:
name: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build
- linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
docker-image: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
docker-image: ${{ needs.linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
timeout-minutes: 300
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-bazel-test:
name: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
linux-focal-cuda12_6-py3_10-gcc9-bazel-test:
name: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
uses: ./.github/workflows/_bazel-build-test.yml
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
cuda-version: "12.4"
build-environment: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-version: "12.6"
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
Expand Down
52 changes: 26 additions & 26 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,14 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-build.outputs.test-matrix }}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-build:
name: linux-focal-cuda12.4-py3.10-gcc9
linux-focal-cuda12_6-py3_10-gcc9-build:
name: linux-focal-cuda12.6-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
Expand All @@ -296,17 +296,17 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-test:
name: linux-focal-cuda12.4-py3.10-gcc9
linux-focal-cuda12_6-py3_10-gcc9-test:
name: linux-focal-cuda12.6-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-build
- linux-focal-cuda12_6-py3_10-gcc9-build
- target-determination
with:
timeout-minutes: 360
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.test-matrix }}
secrets: inherit

linux-jammy-py3-clang12-mobile-build:
Expand Down Expand Up @@ -387,8 +387,8 @@ jobs:
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-version: cpu
test-matrix: |
{ include: [
Expand Down Expand Up @@ -430,14 +430,14 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-sm89-build:
name: linux-focal-cuda12.4-py3.10-gcc9-sm89
linux-focal-cuda12_6-py3_10-gcc9-sm89-build:
name: linux-focal-cuda12.6-py3.10-gcc9-sm89
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-arch-list: 8.9
test-matrix: |
{ include: [
Expand All @@ -449,16 +449,16 @@ jobs:
]}
secrets: inherit

unstable-linux-focal-cuda12_4-py3_10-gcc9-sm89-build-xfail:
unstable-linux-focal-cuda12_6-py3_10-gcc9-sm89-build-xfail:
# A version of the build that sets a larger number of jobs for a build. May
# OOM
name: unstable-linux-focal-cuda12.4-py3.10-gcc9-sm89-xfail
name: unstable-linux-focal-cuda12.6-py3.10-gcc9-sm89-xfail
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-arch-list: 8.9
max-jobs: 4
# Doesn't actually run tests, but need this in order to prevent the build
Expand All @@ -469,16 +469,16 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-sm89-test:
name: linux-focal-cuda12.4-py3.10-gcc9-sm89
linux-focal-cuda12_6-py3_10-gcc9-sm89-test:
name: linux-focal-cuda12.6-py3.10-gcc9-sm89
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-sm89-build
- linux-focal-cuda12_6-py3_10-gcc9-sm89-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm89-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm89-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm89-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm89-build.outputs.test-matrix }}
secrets: inherit

linux-jammy-py3-clang12-executorch-build:
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}

linux-focal-cuda12_4-py3_10-gcc9-sm86-build:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
linux-focal-cuda12_6-py3_10-gcc9-sm86-build:
name: linux-focal-cuda12.6-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
Expand All @@ -64,16 +64,16 @@ jobs:
]}
secrets: inherit

linux-focal-cuda12_4-py3_10-gcc9-sm86-test:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
linux-focal-cuda12_6-py3_10-gcc9-sm86-test:
name: linux-focal-cuda12.6-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-sm86-build
- linux-focal-cuda12_6-py3_10-gcc9-sm86-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm86-build.outputs.test-matrix }}
secrets: inherit

linux-focal-py3_9-clang10-build:
Expand Down
Loading
Loading
0