pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 14 additions & 2 deletions b/‎.ci/docker/build.sh
Lines changed: 14 additions & 2 deletions
diff --git a/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu11.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu11.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu12.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu12.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/common/install_base.sh
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/common/install_base.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_cuda.sh
Lines changed: 4 additions & 3 deletions b/‎.ci/docker/common/install_cuda.sh
Lines changed: 4 additions & 3 deletions
diff --git a/‎.ci/docker/common/install_cuda_aarch64.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_cuda_aarch64.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cudnn.sh
Lines changed: 3 additions & 1 deletion b/‎.ci/docker/common/install_cudnn.sh
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ci/docker/common/install_executorch.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_executorch.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_onnx.sh
Lines changed: 3 additions & 3 deletions b/‎.ci/docker/common/install_onnx.sh
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/docker/common/install_ucc.sh
Lines changed: 25 additions & 1 deletion b/‎.ci/docker/common/install_ucc.sh
Lines changed: 25 additions & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/requirements-ci.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/docker/ubuntu-rocm/Dockerfile
Lines changed: 42 additions & 12 deletions b/‎.ci/docker/ubuntu-rocm/Dockerfile
Lines changed: 42 additions & 12 deletions
diff --git a/‎.ci/manywheel/build_cuda.sh
Lines changed: 24 additions & 0 deletions b/‎.ci/manywheel/build_cuda.sh
Lines changed: 24 additions & 0 deletions
@@ -86,6 +86,10 @@ CMAKE_VERSION=3.18.5
 
 _UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
 _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
+if [[ "$image" == *rocm* ]]; then
+  _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
+  _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
+fi
 
 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
@@ -206,25 +210,33 @@ case "$image" in
     ;;
   pytorch-linux-focal-rocm-n-1-py3)
     ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
+    GCC_VERSION=11
     PROTOBUF=yes
     DB=yes
     VISION=yes
     ROCM_VERSION=6.2.4
     NINJA_VERSION=1.9.0
     CONDA_CMAKE=yes
     TRITON=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-focal-rocm-n-py3)
     ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
+    GCC_VERSION=11
     PROTOBUF=yes
     DB=yes
     VISION=yes
     ROCM_VERSION=6.3
     NINJA_VERSION=1.9.0
     CONDA_CMAKE=yes
     TRITON=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-jammy-xpu-2024.0-py3)
     ANACONDA_PYTHON_VERSION=3.9
 
@@ -1 +1 @@
-2f0518d2cfb4ee4353dce4e39590de43fa391399
+5e4d6b6380d575e48e37e9d987fded4ec588e7bc
@@ -0,0 +1 @@
+v2.21.5-1
@@ -0,0 +1 @@
+v2.25.1-1
@@ -32,8 +32,12 @@ install_ubuntu() {
 
   # HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
   # See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
+  # TODO: Eliminate this hack, we should not relay on apt-get installation
+  # See https://github.com/pytorch/pytorch/issues/144768
   if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
     maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
+  elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
+    maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
   else
     maybe_libnccl_dev=""
   fi
 
@@ -2,7 +2,7 @@
 
 set -ex
 
-NCCL_VERSION=v2.21.5-1
+NCCL_VERSION=v2.25.1-1
 CUDNN_VERSION=9.5.1.17
 
 function install_cusparselt_040 {
@@ -40,6 +40,7 @@ function install_cusparselt_063 {
 
 function install_118 {
     CUDNN_VERSION=9.1.0.70
+    NCCL_VERSION=v2.21.5-1
     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
@@ -239,7 +240,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
@@ -288,4 +289,4 @@ do
         ;;
     esac
     shift
-done
+done
@@ -161,7 +161,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
 
@@ -4,7 +4,9 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn
     pushd tmp_cudnn
-    if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
+    if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
+    elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
 
@@ -37,7 +37,7 @@ install_conda_dependencies() {
 
 install_pip_dependencies() {
   pushd executorch
-  as_jenkins bash install_requirements.sh --pybind xnnpack
+  as_jenkins bash install_executorch.sh
 
   # A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
   # numba and scipy version used in PyTorch CI
 
@@ -31,15 +31,15 @@ pip_install \
 pip_install coloredlogs packaging
 
 pip_install onnxruntime==1.18.1
-pip_install onnx==1.16.2
-pip_install onnxscript==0.1.0.dev20241124 --no-deps
+pip_install onnx==1.17.0
+pip_install onnxscript==0.1.0 --no-deps
 # required by onnxscript
 pip_install ml_dtypes
 
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
-as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
 
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 
@@ -8,6 +8,12 @@ else
   with_cuda=no
 fi
 
+if [[ -d "/opt/rocm" ]]; then
+  with_rocm=/opt/rocm
+else
+  with_rocm=no
+fi
+
 function install_ucx() {
   set -ex
   git clone --recursive https://github.com/openucx/ucx.git
@@ -19,6 +25,7 @@ function install_ucx() {
   ./configure --prefix=$UCX_HOME      \
       --enable-mt                     \
       --with-cuda=$with_cuda          \
+      --with-rocm=$with_rocm          \
       --enable-profiling              \
       --enable-stats
   time make -j
@@ -36,12 +43,29 @@ function install_ucc() {
   git submodule update --init --recursive
 
   ./autogen.sh
+
   # We only run distributed tests on Tesla M60 and A10G
   NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
+
+  if [[ -n "$ROCM_VERSION" ]]; then
+    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
+      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
+    else
+      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
+    fi
+    for arch in $amdgpu_targets; do
+      HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
+    done
+  else
+    HIP_OFFLOAD="all-arch-no-native"
+
   ./configure --prefix=$UCC_HOME          \
     --with-ucx=$UCX_HOME                  \
     --with-cuda=$with_cuda                \
-    --with-nvcc-gencode="${NVCC_GENCODE}"
+    --with-nvcc-gencode="${NVCC_GENCODE}" \
+    --with-rocm=$with_rocm                \
+    --with-rocm-arch="${HIP_OFFLOAD}"
   time make -j
   sudo make install
 
 
@@ -329,7 +329,7 @@ lxml==5.3.0
 
 PyGithub==2.3.0
 
-sympy==1.13.1 ; python_version >= "3.9"
+sympy==1.13.3
 #Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
 #Pinned versions:
 #test that import:
@@ -339,7 +339,7 @@ onnx==1.17.0
 #Pinned versions:
 #test that import:
 
-onnxscript==0.1.0.dev20240817
+onnxscript==0.1.0
 #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
 #Pinned versions:
 #test that import:
 
@@ -14,21 +14,20 @@ ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 
-# Install clang
-ARG LLVMDEV
-ARG CLANG_VERSION
-COPY ./common/install_clang.sh install_clang.sh
-RUN bash ./install_clang.sh && rm install_clang.sh
-
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 
+# Install katex
+ARG KATEX
+COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
+RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
+
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
-ARG CONDA_CMAKE
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+ARG CONDA_CMAKE
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
@@ -39,6 +38,11 @@ ARG GCC_VERSION
 COPY ./common/install_gcc.sh install_gcc.sh
 RUN bash ./install_gcc.sh && rm install_gcc.sh
 
+# Install clang
+ARG CLANG_VERSION
+COPY ./common/install_clang.sh install_clang.sh
+RUN bash ./install_clang.sh && rm install_clang.sh
+
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 COPY ./common/install_protobuf.sh install_protobuf.sh
@@ -85,6 +89,32 @@ COPY ./common/install_amdsmi.sh install_amdsmi.sh
 RUN bash ./install_amdsmi.sh
 RUN rm install_amdsmi.sh
 
+# (optional) Install UCC
+ARG UCX_COMMIT
+ARG UCC_COMMIT
+ENV UCX_COMMIT $UCX_COMMIT
+ENV UCC_COMMIT $UCC_COMMIT
+ENV UCX_HOME /usr
+ENV UCC_HOME /usr
+ADD ./common/install_ucc.sh install_ucc.sh
+RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
+RUN rm install_ucc.sh
+
+COPY ./common/install_openssl.sh install_openssl.sh
+ENV OPENSSL_ROOT_DIR /opt/openssl
+RUN bash ./install_openssl.sh
+ENV OPENSSL_DIR /opt/openssl
+
+ARG INDUCTOR_BENCHMARKS
+ARG ANACONDA_PYTHON_VERSION
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
+COPY ./common/common_utils.sh common_utils.sh
+COPY ci_commit_pins/huggingface.txt huggingface.txt
+COPY ci_commit_pins/timm.txt timm.txt
+RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
+RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
+
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
@@ -107,17 +137,17 @@ COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
 
-# This is needed by sccache
-COPY ./common/install_openssl.sh install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-RUN bash ./install_openssl.sh
-ENV OPENSSL_DIR /opt/openssl
 
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 
+# Install Open MPI for ROCm
+COPY ./common/install_openmpi.sh install_openmpi.sh
+RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
+RUN rm install_openmpi.sh
+
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 
@@ -14,6 +14,7 @@ export USE_CUDA_STATIC_LINK=1
 export INSTALL_TEST=0 # dont install test binaries into site-packages
 export USE_CUPTI_SO=0
 export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
+export USE_CUFILE=${USE_CUFILE:-1}
 
 # Keep an array of cmake variables to add to
 if [[ -z "$CMAKE_ARGS" ]]; then
@@ -118,6 +119,14 @@ if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then
         )
 fi
 
+
+# Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are
+# not available in PYPI
+if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then
+    export USE_CUFILE=0
+fi
+
+
 # CUDA_VERSION 12.4, 12.6, 12.8
 if [[ $CUDA_VERSION == 12* ]]; then
     export USE_STATIC_CUDNN=0
@@ -160,6 +169,16 @@ if [[ $CUDA_VERSION == 12* ]]; then
             "libnvrtc.so.12"
             "libnvrtc-builtins.so"
         )
+        if [[ $USE_CUFILE == 1 ]]; then
+            DEPS_LIST+=(
+                "/usr/local/cuda/lib64/libcufile.so.0"
+                "/usr/local/cuda/lib64/libcufile_rdma.so.1"
+            )
+            DEPS_SONAME+=(
+                "libcufile.so.0"
+                "libcufile_rdma.so.1"
+            )
+        fi
     else
         echo "Using nvidia libs from pypi."
         CUDA_RPATHS=(
@@ -176,6 +195,11 @@ if [[ $CUDA_VERSION == 12* ]]; then
             '$ORIGIN/../../nvidia/nccl/lib'
             '$ORIGIN/../../nvidia/nvtx/lib'
         )
+        if [[ $USE_CUFILE == 1 ]]; then
+            CUDA_RPATHS+=(
+                '$ORIGIN/../../nvidia/cufile/lib'
+            )
+        fi
         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2f0518d2cfb4ee4353dce4e39590de43fa391399`
	`1`	`+5e4d6b6380d575e48e37e9d987fded4ec588e7bc`
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ function prune_126 {`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`function install_128 {`
`164`		`- CUDNN_VERSION=9.7.0.66`
	`164`	`+ CUDNN_VERSION=9.7.1.26`
`165`	`165`	`echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"`
`166`	`166`	`rm -rf /usr/local/cuda-12.8 /usr/local/cuda`
`167`	`167`	`# install CUDA 12.8.0 in the same container`