8000 Merge branch 'main' into justinchu/ghstack/torchlib · pytorch/pytorch@f625b4d · GitHub
[go: up one dir, main page]

Skip to content

Commit f625b4d

Browse files
committed
Merge branch 'main' into justinchu/ghstack/torchlib
2 parents 713e5f3 + dd2a943 commit f625b4d

File tree

2,575 files changed

+32958
-242519
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,575 files changed

+32958
-242519
lines changed

.ci/docker/build.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ CMAKE_VERSION=3.18.5
8686

8787
_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
8888
_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
89+
if [[ "$image" == *rocm* ]]; then
90+
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
91+
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
92+
fi
8993

9094
# It's annoying to rename jobs every time you want to rewrite a
9195
# configuration, so we hardcode everything here rather than do it
@@ -206,25 +210,33 @@ case "$image" in
206210
;;
207211
pytorch-linux-focal-rocm-n-1-py3)
208212
ANACONDA_PYTHON_VERSION=3.10
209-
GCC_VERSION=9
213+
GCC_VERSION=11
210214
PROTOBUF=yes
211215
DB=yes
212216
VISION=yes
213217
ROCM_VERSION=6.2.4
214218
NINJA_VERSION=1.9.0
215219
CONDA_CMAKE=yes
216220
TRITON=yes
221+
KATEX=yes
222+
UCX_COMMIT=${_UCX_COMMIT}
223+
UCC_COMMIT=${_UCC_COMMIT}
224+
INDUCTOR_BENCHMARKS=yes
217225
;;
218226
pytorch-linux-focal-rocm-n-py3)
219227
ANACONDA_PYTHON_VERSION=3.10
220-
GCC_VERSION=9
228+
GCC_VERSION=11
221229
PROTOBUF=yes
222230
DB=yes
223231
VISION=yes
224232
ROCM_VERSION=6.3
225233
NINJA_VERSION=1.9.0
226234
CONDA_CMAKE=yes
227235
TRITON=yes
236+
KATEX=yes
237+
UCX_COMMIT=${_UCX_COMMIT}
238+
UCC_COMMIT=${_UCC_COMMIT}
239+
INDUCTOR_BENCHMARKS=yes
228240
;;
229241
pytorch-linux-jammy-xpu-2024.0-py3)
230242
ANACONDA_PYTHON_VERSION=3.9
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2f0518d2cfb4ee4353dce4e39590de43fa391399
1+
5e4d6b6380d575e48e37e9d987fded4ec588e7bc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v2.21.5-1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v2.25.1-1

.ci/docker/common/install_base.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ install_ubuntu() {
3232

3333
# HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
3434
# See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
35+
# TODO: Eliminate this hack, we should not relay on apt-get installation
36+
# See https://github.com/pytorch/pytorch/issues/144768
3537
if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
3638
maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
39+
elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
40+
maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
3741
else
3842
maybe_libnccl_dev=""
3943
fi

.ci/docker/common/install_cuda.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
set -ex
44

5-
NCCL_VERSION=v2.21.5-1
5+
NCCL_VERSION=v2.25.1-1
66
CUDNN_VERSION=9.5.1.17
77

88
function install_cusparselt_040 {
@@ -40,6 +40,7 @@ function install_cusparselt_063 {
4040

4141
function install_118 {
4242
CUDNN_VERSION=9.1.0.70
43+
NCCL_VERSION=v2.21.5-1
4344
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
4445
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
4546
# install CUDA 11.8.0 in the same container
@@ -239,7 +240,7 @@ function prune_126 {
239240
}
240241

241242
function install_128 {
242-
CUDNN_VERSION=9.7.0.66
243+
CUDNN_VERSION=9.7.1.26
243244
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
244245
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
245246
# install CUDA 12.8.0 in the same container
@@ -288,4 +289,4 @@ do
288289
;;
289290
esac
290291
shift
291-
done
292+
done

.ci/docker/common/install_cuda_aarch64.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ function prune_126 {
161161
}
162162

163163
function install_128 {
164-
CUDNN_VERSION=9.7.0.66
164+
CUDNN_VERSION=9.7.1.26
165165
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
166166
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
167167
# install CUDA 12.8.0 in the same container

.ci/docker/common/install_cudnn.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
44
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
55
mkdir tmp_cudnn
66
pushd tmp_cudnn
7-
if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
7+
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
8+
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
9+
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
810
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
911
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
1012
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"

.ci/docker/common/install_executorch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ install_conda_dependencies() {
3737

3838
install_pip_dependencies() {
3939
pushd executorch
40-
as_jenkins bash install_requirements.sh --pybind xnnpack
40+
as_jenkins bash install_executorch.sh
4141

4242
# A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
4343
# numba and scipy version used in PyTorch CI

.ci/docker/common/install_onnx.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@ pip_install \
3131
pip_install coloredlogs packaging
3232

3333
pip_install onnxruntime==1.18.1
34-
pip_install onnx==1.16.2
35-
pip_install onnxscript==0.1.0.dev20241124 --no-deps
34+
pip_install onnx==1.17.0
35+
pip_install onnxscript==0.1.0 --no-deps
3636
# required by onnxscript
3737
pip_install ml_dtypes
3838

3939
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
4040
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
4141
IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
42-
as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
42+
as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
4343

4444
# Need a PyTorch version for transformers to work
4545
pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu

.ci/docker/common/install_ucc.sh

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ else
88
with_cuda=no
99
fi
1010

11+
if [[ -d "/opt/rocm" ]]; then
12+
with_rocm=/opt/rocm
13+
else
14+
with_rocm=no
15+
fi
16+
1117
function install_ucx() {
1218
set -ex
1319
git clone --recursive https://github.com/openucx/ucx.git
@@ -19,6 +25,7 @@ function install_ucx() {
1925
./configure --prefix=$UCX_HOME \
2026
--enable-mt \
2127
--with-cuda=$with_cuda \
28+
--with-rocm=$with_rocm \
2229
--enable-profiling \
2330
--enable-stats
2431
time make -j
@@ -36,12 +43,29 @@ function install_ucc() {
3643
git submodule update --init --recursive
3744

3845
./autogen.sh
46+
3947
# We only run distributed tests on Tesla M60 and A10G
4048
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
49+
50+
if [[ -n "$ROCM_VERSION" ]]; then
51+
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
52+
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
53+
else
54+
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
55+
fi
56+
for arch in $amdgpu_targets; do
57+
HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
58+
done
59+
else
60+
HIP_OFFLOAD="all-arch-no-native"
61+
fi
62+
4163
./configure --prefix=$UCC_HOME \
4264
--with-ucx=$UCX_HOME \
4365
--with-cuda=$with_cuda \
44-
--with-nvcc-gencode="${NVCC_GENCODE}"
66+
--with-nvcc-gencode="${NVCC_GENCODE}" \
67+
--with-rocm=$with_rocm \
68+
--with-rocm-arch="${HIP_OFFLOAD}"
4569
time make -j
4670
sudo make install
4771

.ci/docker/requirements-ci.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ lxml==5.3.0
329329

330330
PyGithub==2.3.0
331331

332-
sympy==1.13.1 ; python_version >= "3.9"
332+
sympy==1.13.3
333333
#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
334334
#Pinned versions:
335335
#test that import:
@@ -339,7 +339,7 @@ onnx==1.17.0
339339
#Pinned versions:
340340
#test that import:
341341

342-
onnxscript==0.1.0.dev20240817
342+
onnxscript==0.1.0
343343
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
344344
#Pinned versions:
345345
#test that import:

.ci/docker/ubuntu-rocm/Dockerfile

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,20 @@ ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
1414
COPY ./common/install_base.sh install_base.sh
1515
RUN bash ./install_base.sh && rm install_base.sh
1616

17-
# Install clang
18-
ARG LLVMDEV
19-
ARG CLANG_VERSION
20-
COPY ./common/install_clang.sh install_clang.sh
21-
RUN bash ./install_clang.sh && rm install_clang.sh
22-
2317
# Install user
2418
COPY ./common/install_user.sh install_user.sh
2519
RUN bash ./install_user.sh && rm install_user.sh
2620

21+
# Install katex
22+
ARG KATEX
23+
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
24+
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
25+
2726
# Install conda and other packages (e.g., numpy, pytest)
2827
ARG ANACONDA_PYTHON_VERSION
29-
ARG CONDA_CMAKE
3028
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
3129
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
30+
ARG CONDA_CMAKE
3231
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
3332
COPY ./common/install_conda.sh install_conda.sh
3433
COPY ./common/common_utils.sh common_utils.sh
@@ -39,6 +38,11 @@ ARG GCC_VERSION
3938
COPY ./common/install_gcc.sh install_gcc.sh
4039
RUN bash ./install_gcc.sh && rm install_gcc.sh
4140

41+
# Install clang
42+
ARG CLANG_VERSION
43+
COPY ./common/install_clang.sh install_clang.sh
44+
RUN bash ./install_clang.sh && rm install_clang.sh
45+
4246
# (optional) Install protobuf for ONNX
4347
ARG PROTOBUF
4448
COPY ./common/install_protobuf.sh install_protobuf.sh
@@ -85,6 +89,32 @@ COPY ./common/install_amdsmi.sh install_amdsmi.sh
8589
RUN bash ./install_amdsmi.sh
8690
RUN rm install_amdsmi.sh
8791

92+
# (optional) Install UCC
93+
ARG UCX_COMMIT
94+
ARG UCC_COMMIT
95+
ENV UCX_COMMIT $UCX_COMMIT
96+
ENV UCC_COMMIT $UCC_COMMIT
97+
ENV UCX_HOME /usr
98+
ENV UCC_HOME /usr
99+
ADD ./common/install_ucc.sh install_ucc.sh
100+
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
101+
RUN rm install_ucc.sh
102+
103+
COPY ./common/install_openssl.sh install_openssl.sh
104+
ENV OPENSSL_ROOT_DIR /opt/openssl
105+
RUN bash ./install_openssl.sh
106+
ENV OPENSSL_DIR /opt/openssl
107+
108+
ARG INDUCTOR_BENCHMARKS
109+
ARG ANACONDA_PYTHON_VERSION
110+
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
111+
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
112+
COPY ./common/common_utils.sh common_utils.sh
113+
COPY ci_commit_pins/huggingface.txt huggingface.txt
114+
COPY ci_commit_pins/timm.txt timm.txt
115+
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
116+
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
117+
88118
# (optional) Install non-default CMake version
89119
ARG CMAKE_VERSION
90120
COPY ./common/install_cmake.sh install_cmake.sh
@@ -107,17 +137,17 @@ COPY triton_version.txt triton_version.txt
107137
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
108138
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
109139

110-
# This is needed by sccache
111-
COPY ./common/install_openssl.sh install_openssl.sh
112-
ENV OPENSSL_ROOT_DIR /opt/openssl
113-
RUN bash ./install_openssl.sh
114-
ENV OPENSSL_DIR /opt/openssl
115140

116141
# Install ccache/sccache (do this last, so we get priority in PATH)
117142
COPY ./common/install_cache.sh install_cache.sh
118143
ENV PATH /opt/cache/bin:$PATH
119144
RUN bash ./install_cache.sh && rm install_cache.sh
120145

146+
# Install Open MPI for ROCm
147+
COPY ./common/install_openmpi.sh install_openmpi.sh
148+
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
149+
RUN rm install_openmpi.sh
150+
121151
# Include BUILD_ENVIRONMENT environment variable in image
122152
ARG BUILD_ENVIRONMENT
123153
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

.ci/manywheel/build_cuda.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export USE_CUDA_STATIC_LINK=1
1414
export INSTALL_TEST=0 # dont install test binaries into site-packages
1515
export USE_CUPTI_SO=0
1616
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
17+
export USE_CUFILE=${USE_CUFILE:-1}
1718

1819
# Keep an array of cmake variables to add to
1920
if [[ -z "$CMAKE_ARGS" ]]; then
@@ -118,6 +119,14 @@ if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then
118119
)
119120
fi
120121

122+
123+
# Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are
124+
# not available in PYPI
125+
if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then
126+
export USE_CUFILE=0
127+
fi
128+
129+
121130
# CUDA_VERSION 12.4, 12.6, 12.8
122131
if [[ $CUDA_VERSION == 12* ]]; then
123132
export USE_STATIC_CUDNN=0
@@ -160,6 +169,16 @@ if [[ $CUDA_VERSION == 12* ]]; then
160169
"libnvrtc.so.12"
161170
"libnvrtc-builtins.so"
162171
)
172+
if [[ $USE_CUFILE == 1 ]]; then
173+
DEPS_LIST+=(
174+
"/usr/local/cuda/lib64/libcufile.so.0"
175+
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
176+
)
177+
DEPS_SONAME+=(
178+
"libcufile.so.0"
179+
"libcufile_rdma.so.1"
180+
)
181+
fi
163182
else
164183
echo "Using nvidia libs from pypi."
165184
CUDA_RPATHS=(
@@ -176,6 +195,11 @@ if [[ $CUDA_VERSION == 12* ]]; then
176195
'$ORIGIN/../../nvidia/nccl/lib'
177196
'$ORIGIN/../../nvidia/nvtx/lib'
178197
)
198+
if [[ $USE_CUFILE == 1 ]]; then
199+
CUDA_RPATHS+=(
200+
'$ORIGIN/../../nvidia/cufile/lib'
201+
)
202+
fi
179203
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
180204
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
181205
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'

0 commit comments

Comments
 (0)
0