8000 Update on "[cudagraphs] Fix issue in collecting static_input_idxs" · pytorch/pytorch@a592e8c · GitHub
[go: up one dir, main page]

Skip to content

Commit a592e8c

Browse files
committed
Update on "[cudagraphs] Fix issue in collecting static_input_idxs"
related to #152275 cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx ipiszy chenyang78 kadeng muchulee8 amjames chauhang aakhundov [ghstack-poisoned]
2 parents f0f3526 + d287a61 commit a592e8c

File tree

234 files changed

+3603
-1214
lines changed

Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ RUN bash ./install_conda.sh && rm install_conda.sh
4141

4242
# Install CUDA
4343
FROM base as cuda
44-
ARG CUDA_VERSION=12.4
44+
ARG CUDA_VERSION=12.6
4545
RUN rm -rf /usr/local/cuda-*
4646
ADD ./common/install_cuda.sh install_cuda.sh
4747
COPY ./common/install_nccl.sh install_nccl.sh
@@ -57,28 +57,23 @@ FROM cuda as cuda11.8
5757
RUN bash ./install_cuda.sh 11.8
5858
ENV DESIRED_CUDA=11.8
5959

60-
FROM cuda as cuda12.1
61-
RUN bash ./install_cuda.sh 12.1
62-
ENV DESIRED_CUDA=12.1
63-
64-
FROM cuda as cuda12.4
65-
RUN bash ./install_cuda.sh 12.4
66-
ENV DESIRED_CUDA=12.4
67-
6860
FROM cuda as cuda12.6
6961
RUN bash ./install_cuda.sh 12.6
7062
ENV DESIRED_CUDA=12.6
7163

64+
FROM cuda as cuda12.8
65+
RUN bash ./install_cuda.sh 12.8
66+
ENV DESIRED_CUDA=12.8
67+
7268
# Install MNIST test data
7369
FROM base as mnist
7470
ADD ./common/install_mnist.sh install_mnist.sh
7571
RUN bash ./install_mnist.sh
7672

7773
FROM base as all_cuda
7874
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
79-
COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1
80-
COPY --from=cuda12.4 /usr/local/cuda-12.4 /usr/local/cuda-12.4
8175
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
76+
COPY --from=cuda12.4 /usr/local/cuda-12.8 /usr/local/cuda-12.8
8277

8378
# Final step
8479
FROM ${BASE_TARGET} as final
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@ function install_timm() {
1414
local commit
1515
commit=$(get_pinned_commit timm)
1616

17-
# TODO (huydhn): There is no torchvision release on 3.13 when I write this, so
18-
# I'm using nightly here instead. We just need to package to be able to install
19-
# TIMM. Removing this once vision has a release on 3.13
20-
if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then
21-
pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
22-
fi
23-
2417
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
2518
# Clean up
2619
conda_run pip uninstall -y cmake torch torchvision triton
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
1212
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \
1313
-e DESIRED_CUDA=${DESIRED_CUDA} \
1414
-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \
15-
"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \
15+
"pytorch/almalinux-builder:cuda${DESIRED_CUDA}-main" \
1616
magma/build_magma.sh
1717

1818
.PHONY: all
1919
all: magma-cuda128
2020
all: magma-cuda126
21-
all: magma-cuda124
2221
all: magma-cuda118
2322

2423
.PHONY:
@@ -37,11 +36,6 @@ magma-cuda126: DESIRED_CUDA := 12.6
3736
magma-cuda126:
3837
$(DOCKER_RUN)
3938

40-
.PHONY: magma-cuda124
41-
magma-cuda124: DESIRED_CUDA := 12.4
42-
magma-cuda124:
43-
$(DOCKER_RUN)
44-
4539
.PHONY: magma-cuda118
4640
magma-cuda118: DESIRED_CUDA := 11.8
4741
magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ac9a39f4b768cef09b9d2be8e074be496d7783b6
1+
14256e6040d9e14698a877924456cdd92bfcd01d
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,6 @@ def generate_wheels_matrix(
394394
"pytorch_extra_install_requirements": (
395395
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
396396
if gpu_arch_type == "xpu"
397-
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
398-
if os != "linux"
399397
else ""
400398
),
401399
}
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ jobs:
185185
fi
186186
187187
- name: Checkout PyTorch to pytorch dir
188-
uses: actions/checkout@v4
188+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
189189
with:
190190
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
191191
submodules: recursive
@@ -213,7 +213,7 @@ jobs:
213213
- name: configure aws credentials
214214
id: aws_creds
215215
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' && startsWith(github.event.ref, 'refs/tags/ciflow/') }}
216-
uses: aws-actions/configure-aws-credentials@v4
216+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
217217
with:
218218
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
219219
aws-region: us-east-1
@@ -283,7 +283,7 @@ jobs:
283283
# Ensure the working directory gets chowned back to the current user
284284
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
285285
286-
- uses: actions/upload-artifact@v4.4.0
286+
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
287287
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
288288
with:
289289
name: ${{ inputs.build_name }}
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
mkdir "${GITHUB_WORKSPACE}"
163163
164164
- name: Checkout PyTorch to pytorch dir
165-
uses: actions/checkout@v4
165+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
166166
with:
167167
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
168168
submodules: recursive
@@ -189,7 +189,7 @@ jobs:
189189
190190
- name: Download Build Artifacts
191191
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
192-
uses: actions/download-artifact@v4.1.7
192+
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
193193
with:
194194
name: ${{ inputs.build_name }}
195195
path: "${{ runner.temp }}/artifacts/"
@@ -201,7 +201,7 @@ jobs:
201201
- name: configure aws credentials
202202
id: aws_creds
203203
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' && startsWith(github.event.ref, 'refs/tags/ciflow/') }}
204-
uses: aws-actions/configure-aws-credentials@v4
204+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
205205
with:
206206
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
207207
aws-region: us-east-1
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,14 @@ jobs:
9595

9696
- name: Configure AWS credentials(PyTorch account) for nightly
9797
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/nightly' }}
98-
uses: aws-actions/configure-aws-credentials@v4
98+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
9999
with:
100100
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
101101
aws-region: us-east-1
102102

103103
- name: Configure AWS credentials(PyTorch account) for RC builds
104104
if: ${{ github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
105-
uses: aws-actions/configure-aws-credentials@v4
105+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
106106
with:
107107
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
108108
aws-region: us-east-1
@@ -112,7 +112,7 @@ jobs:
112112
# NB: When the previous build job is skipped, there won't be any artifacts and
113113
# this step will fail. Binary build jobs can only be skipped on CI, not nightly
114114
continue-on-error: true
115-
uses: actions/download-artifact@v4.1.7
115+
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
116116
with:
117117
name: ${{ inputs.build_name }}
118118
path: "${{ runner.temp }}/artifacts/"
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102

103103
- name: configure aws credentials
104104
if : ${{ inputs.aws-role-to-assume != '' }}
105-
uses: aws-actions/configure-aws-credentials@v4
105+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
106106
with:
107107
role-to-assume: ${{ inputs.aws-role-to-assume }}
108108
role-session-name: gha-linux-test
@@ -185,14 +185,14 @@ jobs:
185185

186186
- name: configure aws credentials
187187
if : ${{ inputs.upload-aws-role-to-assume != '' }}
188-
uses: aws-actions/configure-aws-credentials@v4
188+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
189189
with:
190190
role-to-assume: ${{ inputs.upload-aws-role-to-assume }}
191191
role-session-name: gha-linux-test
192192
aws-region: us-east-1
193193

194194
- name: Upload Python Docs Preview
195-
uses: seemethere/upload-artifact-s3@v5
195+
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
196196
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' && steps.build-docs.outcome == 'success' }}
197197
with:
198198
retention-days: 14
@@ -202,7 +202,7 @@ jobs:
202202
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}
203203

204204
- name: Upload C++ Docs Preview
205-
uses: seemethere/upload-artifact-s3@v5
205+
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
206206
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'cpp' && steps.build-docs.outcome == 'success' }}
207207
with:
208208
retention-days: 14
@@ -212,7 +212,7 @@ jobs:
212212
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/cppdocs
213213

214214
- name: Upload functorch Docs Preview
215-
uses: seemethere/upload-artifact-s3@v5
215+
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
216216
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'functorch' && steps.build-docs.outcome == 'success' }}
217217
with:
218218
retention-days: 14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: Link Checks
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
type: string
8+
required: true
9+
ref:
10+
type: string
11+
required: true
12+
13+
jobs:
14+
lint-urls:
15+
name: Lint URLs
16+
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
17+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
18+
with:
19+
timeout: 120
20+
runner: ${{ inputs.runner }}linux.2xlarge
21+
docker-image: pytorch-linux-focal-linter
22+
fetch-depth: 0
23+
submodules: false
24+
ref: ${{ inputs.ref }}
25+
script: |
26+
./scripts/lint_urls.sh $(
27+
[ "${{ github.event_name }}" = "pull_request" ] \
28+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}
29+
) || {
30+
echo
31+
echo "URL lint failed. If this is a transient outage, you can bypass it by adding the \`skip-url-lint\` label to your PR."
32+
exit 1
33+
}
34+
35+
lint-xrefs:
36+
name: Lint Xrefs
37+
needs: lint-urls
38+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
39+
with:
40+
timeout: 60
41+
runner: ${{ inputs.runner }}linux.2xlarge
42+
docker-image: pytorch-linux-focal-linter
43+
fetch-depth: 0
44+
submodules: false
45+
ref: ${{ inputs.ref }}
46+
script: |
47+
./scripts/lint_xrefs.sh $(
48+
[ "${{ github.event_name }}" = "pull_request" ] \
49+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}
50+
)