8000 [CI][CUDA] Move away from cuda12.4, Add cuda12.6 eager CI tests (#148… · pytorch/pytorch@a0bc6d8 · GitHub
[go: up one dir, main page]

Skip to content

Commit a0bc6d8

Browse files
tinglvvatalman
authored andcommitted
[CI][CUDA] Move away from cuda12.4, Add cuda12.6 eager CI tests (#148602)
#145570 breaking #140793 into eager and inductor benchmarks to unblock Pull Request resolved: #148602 Approved by: https://github.com/atalman, https://github.com/malfet Co-authored-by: atalman <atalman@fb.com>
1 parent e2a0296 commit a0bc6d8

File tree

7 files changed

+97
-95
lines changed

7 files changed

+97
-95
lines changed

.ci/docker/build.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ fi
9999
# configuration, so we hardcode everything here rather than do it
100100
# from scratch
101101
case "$image" in
102-
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
103-
CUDA_VERSION=12.4.1
102+
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9)
103+
CUDA_VERSION=12.6.3
104104
CUDNN_VERSION=9
105105
ANACONDA_PYTHON_VERSION=3.10
106106
GCC_VERSION=9
@@ -326,15 +326,15 @@ case "$image" in
326326
EXECUTORCH=yes
327327
;;
328328
pytorch-linux-jammy-py3.12-halide)
329-
CUDA_VERSION=12.4
329+
CUDA_VERSION=12.6
330330
ANACONDA_PYTHON_VERSION=3.12
331331
GCC_VERSION=11
332332
CONDA_CMAKE=yes
333333
HALIDE=yes
334334
TRITON=yes
335335
;;
336336
pytorch-linux-jammy-py3.12-triton-cpu)
337-
CUDA_VERSION=12.4
337+
CUDA_VERSION=12.6
338338
ANACONDA_PYTHON_VERSION=3.12
339339
GCC_VERSION=11
340340
CONDA_CMAKE=yes

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
matrix:
5050
runner: [linux.12xlarge]
5151
docker-image-name: [
52-
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9,
52+
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9,
5353
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks,
5454
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks,
5555
pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks,

.github/workflows/periodic.yml

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ jobs:
4949
curr_branch: ${{ github.head_ref || github.ref_name }}
5050
curr_ref_type: ${{ github.ref_type }}
5151

52-
linux-focal-cuda12_4-py3_10-gcc9-build:
53-
name: linux-focal-cuda12.4-py3.10-gcc9
52+
linux-focal-cuda12_6-py3_10-gcc9-build:
53+
name: linux-focal-cuda12.6-py3.10-gcc9
5454
uses: ./.github/workflows/_linux-build.yml
5555
needs: get-label-type
5656
with:
5757
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
58-
build-environment: linux-focal-cuda12.4-py3.10-gcc9
59-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
58+
build-environment: linux-focal-cuda12.6-py3.10-gcc9
59+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
6060
test-matrix: |
6161
{ include: [
6262
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
@@ -67,16 +67,16 @@ jobs:
6767
]}
6868
secrets: inherit
6969

70-
linux-focal-cuda12_4-py3_10-gcc9-test:
71-
name: linux-focal-cuda12.4-py3.10-gcc9
70+
linux-focal-cuda12_6-py3_10-gcc9-test:
71+
name: linux-focal-cuda12.6-py3.10-gcc9
7272
uses: ./.github/workflows/_linux-test.yml
7373
needs:
74-
- linux-focal-cuda12_4-py3_10-gcc9-build
74+
- linux-focal-cuda12_6-py3_10-gcc9-build
7575
- target-determination
7676
with:
77-
build-environment: linux-focal-cuda12.4-py3.10-gcc9
78-
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
79-
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
77+
build-environment: linux-focal-cuda12.6-py3.10-gcc9
78+
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.docker-image }}
79+
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.test-matrix }}
8080
secrets: inherit
8181

8282
linux-focal-cuda11_8-py3_9-gcc9-build:
@@ -170,16 +170,16 @@ jobs:
170170
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
171171
secrets: inherit
172172

173-
linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:
174-
name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
173+
linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build:
174+
name: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
175175
uses: ./.github/workflows/_linux-build.yml
176176
needs: get-label-type
177177
if: false # See https://github.com/pytorch/pytorch/issues/138750
178178
with:
179179
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
180180
use_split_build: true
181-
build-environment: linux-focal-cuda12.4-py3.10-gcc9
182-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
181+
build-environment: linux-focal-cuda12.6-py3.10-gcc9
182+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
183183
test-matrix: |
184184
{ include: [
185185
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
@@ -190,16 +190,16 @@ jobs:
190190
]}
191191
secrets: inherit
192192

193-
linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build-test:
194-
name: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
193+
linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build-test:
194+
name: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
195195
uses: ./.github/workflows/_linux-test.yml
196196
needs:
197-
- linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build
197+
- linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build
198198
- target-determination
199199
with:
200-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
201-
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
202-
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
200+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-experimental-split-build
201+
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
202+
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
203203
secrets: inherit
204204

205205

@@ -265,14 +265,14 @@ jobs:
265265
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
266266
secrets: inherit
267267

268-
linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build:
269-
name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
268+
linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build:
269+
name: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
270270
uses: ./.github/workflows/_linux-build.yml
271271
needs: get-label-type
272272
with:
273273
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
274-
build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
275-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
274+
build-environment: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
275+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
276276
cuda-arch-list: 8.6
277277
test-matrix: |
278278
{ include: [
@@ -287,28 +287,28 @@ jobs:
287287
]}
288288
secrets: inherit
289289

290-
linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-test:
291-
name: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
290+
linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-test:
291+
name: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
292292
uses: ./.github/workflows/_linux-test.yml
293293
needs:
294-
- linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build
294+
- linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build
295295
- target-determination
296296
with:
297-
build-environment: linux-focal-cuda12.4-py3-gcc9-slow-gradcheck
298-
docker-image: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
299-
test-matrix: ${{ needs.linux-focal-cuda12_4-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
297+
build-environment: linux-focal-cuda12.6-py3-gcc9-slow-gradcheck
298+
docker-image: ${{ needs.linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
299+
test-matrix: ${{ needs.linux-focal-cuda12_6-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
300300
timeout-minutes: 300
301301
secrets: inherit
302302

303-
linux-focal-cuda12_4-py3_10-gcc9-bazel-test:
304-
name: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
303+
linux-focal-cuda12_6-py3_10-gcc9-bazel-test:
304+
name: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
305305
uses: ./.github/workflows/_bazel-build-test.yml
306306
needs: get-label-type
307307
with:
308308
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
309-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
310-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
311-
cuda-version: "12.4"
309+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
310+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
311+
cuda-version: "12.6"
312312
test-matrix: |
313313
{ include: [
314314
{ config: "default", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },

.github/workflows/pull.yml

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,14 @@ jobs:
278278
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-build.outputs.test-matrix }}
279279
secrets: inherit
280280

281-
linux-focal-cuda12_4-py3_10-gcc9-build:
282-
name: linux-focal-cuda12.4-py3.10-gcc9
281+
linux-focal-cuda12_6-py3_10-gcc9-build:
282+
name: linux-focal-cuda12.6-py3.10-gcc9
283283
uses: ./.github/workflows/_linux-build.yml
284284
needs: get-label-type
285285
with:
286286
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
287-
build-environment: linux-focal-cuda12.4-py3.10-gcc9
288-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
287+
build-environment: linux-focal-cuda12.6-py3.10-gcc9
288+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
289289
test-matrix: |
290290
{ include: [
291291
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
@@ -296,17 +296,17 @@ jobs:
296296
]}
297297
secrets: inherit
298298

299-
linux-focal-cuda12_4-py3_10-gcc9-test:
300-
name: linux-focal-cuda12.4-py3.10-gcc9
299+
linux-focal-cuda12_6-py3_10-gcc9-test:
300+
name: linux-focal-cuda12.6-py3.10-gcc9
301301
uses: ./.github/workflows/_linux-test.yml
302302
needs:
303-
- linux-focal-cuda12_4-py3_10-gcc9-build
303+
- linux-focal-cuda12_6-py3_10-gcc9-build
304304
- target-determination
305305
with:
306306
timeout-minutes: 360
307-
build-environment: linux-focal-cuda12.4-py3.10-gcc9
308-
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
309-
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
307+
build-environment: linux-focal-cuda12.6-py3.10-gcc9
308+
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.docker-image }}
309+
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-build.outputs.test-matrix }}
310310
secrets: inherit
311311

312312
linux-jammy-py3-clang12-mobile-build:
@@ -387,8 +387,8 @@ jobs:
387387
needs: get-label-type
388388
with:
389389
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
390-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
391-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
390+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-bazel-test
391+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
392392
cuda-version: cpu
393393
test-matrix: |
394394
{ include: [
@@ -430,14 +430,14 @@ jobs:
430430
]}
431431
secrets: inherit
432432

433-
linux-focal-cuda12_4-py3_10-gcc9-sm89-build:
434-
name: linux-focal-cuda12.4-py3.10-gcc9-sm89
433+
linux-focal-cuda12_6-py3_10-gcc9-sm89-build:
434+
name: linux-focal-cuda12.6-py3.10-gcc9-sm89
435435
uses: ./.github/workflows/_linux-build.yml
436436
needs: get-label-type
437437
with:
438438
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
439-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
440-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
439+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
440+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
441441
cuda-arch-list: 8.9
442442
test-matrix: |
443443
{ include: [
@@ -449,16 +449,16 @@ jobs:
449449
]}
450450
secrets: inherit
451451

452-
unstable-linux-focal-cuda12_4-py3_10-gcc9-sm89-build-xfail:
452+
unstable-linux-focal-cuda12_6-py3_10-gcc9-sm89-build-xfail:
453453
# A version of the build that sets a larger number of jobs for a build. May
454454
# OOM
455-
name: unstable-linux-focal-cuda12.4-py3.10-gcc9-sm89-xfail
455+
name: unstable-linux-focal-cuda12.6-py3.10-gcc9-sm89-xfail
456456
uses: ./.github/workflows/_linux-build.yml
457457
needs: get-label-type
458458
with:
459459
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
460-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
461-
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
460+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
461+
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9
462462
cuda-arch-list: 8.9
463463
max-jobs: 4
464464
# Doesn't actually run tests, but need this in order to prevent the build
@@ -469,16 +469,16 @@ jobs:
469469
]}
470470
secrets: inherit
471471

472-
linux-focal-cuda12_4-py3_10-gcc9-sm89-test:
473-
name: linux-focal-cuda12.4-py3.10-gcc9-sm89
472+
linux-focal-cuda12_6-py3_10-gcc9-sm89-test:
473+
name: linux-focal-cuda12.6-py3.10-gcc9-sm89
474474
uses: ./.github/workflows/_linux-test.yml
475475
needs:
476-
- linux-focal-cuda12_4-py3_10-gcc9-sm89-build
476+
- linux-focal-cuda12_6-py3_10-gcc9-sm89-build
477477
- target-determination
478478
with:
479-
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm89
480-
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm89-build.outputs.docker-image }}
481-
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm89-build.outputs.test-matrix }}
479+
build-environment: linux-focal-cuda12.6-py3.10-gcc9-sm89
480+
docker-image: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm89-build.outputs.docker-image }}
481+
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-sm89-build.outputs.test-matrix }}
482482
secrets: inherit
483483

484484
linux-jammy-py3-clang12-executorch-build:

.github/workflows/slow.yml

0