8000 Separate arm64 and amd64 docker builds (#125617) · pytorch/pytorch@af113e7 · GitHub
[go: up one dir, main page]

Skip to content

Commit af113e7

Browse files
atalmanpytorchbot
authored andcommitted
Separate arm64 and amd64 docker builds (#125617)
Fixes #125094 Please note: Docker CUDa 12.4 failure is existing issue, related to docker image not being available on gitlab: ``` docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: not found ``` https://github.com/pytorch/pytorch/actions/runs/8974959068/job/24648540236?pr=125617 Here is the reference issue: https://gitlab.com/nvidia/container-images/cuda/-/issues/225 Tracked on our side: pytorch/builder#1811 Pull Request resolved: #125617 Approved by: https://github.com/huydhn, https://github.com/malfet (cherry picked from commit b29d77b)
1 parent 2e165ec commit af113e7

File tree

3 files changed

+47
-6
lines changed

3 files changed

+47
-6
lines changed

.github/scripts/generate_docker_release_matrix.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
2323
ret: List[Dict[str, str]] = []
24+
# CUDA amd64 Docker images are available as both runtime and devel while
25+
# CPU arm64 image is only available as runtime.
2426
for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
2527
for image in DOCKER_IMAGE_TYPES:
2628
ret.append(
@@ -31,9 +33,19 @@ def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
3133
cuda
3234
],
3335
"image_type": image,
34-
"platform": "linux/arm64,linux/amd64",
36+
"platform": "linux/amd64",
3537
}
3638
)
39+
ret.append(
40+
{
41+
"cuda": "cpu",
42+
"cuda_full_version": "",
43+
"cudnn_version": "",
44+
"image_type": "runtime",
45+
"platform": "linux/arm64",
46+
}
47+
)
48+
3749
return {"include": ret}
3850

3951

.github/workflows/docker-release.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ on:
77
- Dockerfile
88
- docker.Makefile
99
- .github/workflows/docker-release.yml
10+
- .github/scripts/generate_docker_release_matrix.py
1011
push:
1112
branches:
1213
- nightly
@@ -129,17 +130,27 @@ jobs:
129130
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
130131
run: |
131132
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
133+
CUDA_SUFFIX="-cu${CUDA_VERSION}"
134+
if [[ ${CUDA_VERSION_SHORT} == "cpu" ]]; then
135+
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
136+
CUDA_SUFFIX=""
137+
fi
132138
133139
PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
134140
python -c 'import torch; print(torch.version.git_version[:7],end="")')
135141
136142
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
137-
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
138-
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
143+
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
144+
145+
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
146+
147+
# Please note, here we ned to pin specific verison of CUDA as with latest label
148+
if [[ ${CUDA_VERSION_SHORT} == "12.1" ]]; then
149+
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}" \
150+
ghcr.io/pytorch/pytorch-nightly:latest
151+
docker push ghcr.io/pytorch/pytorch-nightly:latest
152+
fi
139153
140-
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
141-
ghcr.io/pytorch/pytorch-nightly:latest
142-
docker push ghcr.io/pytorch/pytorch-nightly:latest
143154
- name: Teardown Linux
144155
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
145156
if: always()

docker.Makefile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,22 @@ devel-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CU
8383
devel-push:
8484
$(DOCKER_PUSH)
8585

86+
ifeq ("$(CUDA_VERSION_SHORT)","cpu")
87+
88+
.PHONY: runtime-image
89+
runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
90+
runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
91+
runtime-image:
92+
$(DOCKER_BUILD)
93+
94+
.PHONY: runtime-push
95+
runtime-push: BASE_IMAGE := $(BASE_RUNTIME)
96+
runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
97+
runtime-push:
98+
$(DOCKER_PUSH)
99+
100+
else
101+
86102
.PHONY: runtime-image
87103
runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
88104
runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime
@@ -95,6 +111,8 @@ runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(
95111
runtime-push:
96112
$(DOCKER_PUSH)
97113

114+
endif
115+
98116
.PHONY: clean
99117
clean:
100118
-docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME))

0 commit comments

Comments
 (0)
0