pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 14 additions & 2 deletions b/‎.ci/docker/build.sh
Lines changed: 14 additions & 2 deletions
diff --git a/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu11.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu11.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu12.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu12.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/common/install_base.sh
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/common/install_base.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_cuda.sh
Lines changed: 4 additions & 3 deletions b/‎.ci/docker/common/install_cuda.sh
Lines changed: 4 additions & 3 deletions
diff --git a/‎.ci/docker/common/install_cuda_aarch64.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_cuda_aarch64.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cudnn.sh
Lines changed: 3 additions & 1 deletion b/‎.ci/docker/common/install_cudnn.sh
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ci/docker/common/install_onnx.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_onnx.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_ucc.sh
Lines changed: 25 additions & 1 deletion b/‎.ci/docker/common/install_ucc.sh
Lines changed: 25 additions & 1 deletion
@@ -86,6 +86,10 @@ CMAKE_VERSION=3.18.5
 
 _UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
 _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
+if [[ "$image" == *rocm* ]]; then
+  _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
+  _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
+fi
 
 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
@@ -206,25 +210,33 @@ case "$image" in
     ;;
   pytorch-linux-focal-rocm-n-1-py3)
     ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
+    GCC_VERSION=11
     PROTOBUF=yes
     DB=yes
     VISION=yes
     ROCM_VERSION=6.2.4
     NINJA_VERSION=1.9.0
     CONDA_CMAKE=yes
     TRITON=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-focal-rocm-n-py3)
     ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
+    GCC_VERSION=11
     PROTOBUF=yes
     DB=yes
     VISION=yes
     ROCM_VERSION=6.3
     NINJA_VERSION=1.9.0
     CONDA_CMAKE=yes
     TRITON=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-jammy-xpu-2024.0-py3)
     ANACONDA_PYTHON_VERSION=3.9
 
@@ -1 +1 @@
-41e7ffa8b7ff09206aa5b9b5c1bbd82b9e0ff277
+f936c9992f9730e4996fd6ac16ed6cdc723190af
@@ -0,0 +1 @@
+v2.21.5-1
@@ -0,0 +1 @@
+v2.25.1-1
@@ -32,8 +32,12 @@ install_ubuntu() {
 
   # HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
   # See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
+  # TODO: Eliminate this hack, we should not relay on apt-get installation
+  # See https://github.com/pytorch/pytorch/issues/144768
   if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
     maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
+  elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
+    maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
   else
     maybe_libnccl_dev=""
   fi
 
@@ -2,7 +2,7 @@
 
 set -ex
 
-NCCL_VERSION=v2.21.5-1
+NCCL_VERSION=v2.25.1-1
 CUDNN_VERSION=9.5.1.17
 
 function install_cusparselt_040 {
@@ -40,6 +40,7 @@ function install_cusparselt_063 {
 
 function install_118 {
     CUDNN_VERSION=9.1.0.70
+    NCCL_VERSION=v2.21.5-1
     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
@@ -239,7 +240,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
@@ -288,4 +289,4 @@ do
         ;;
     esac
     shift
-done
+done
@@ -161,7 +161,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
 
@@ -4,7 +4,9 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn
     pushd tmp_cudnn
-    if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
+    if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
+    elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
 
@@ -39,7 +39,7 @@ pip_install ml_dtypes
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
-as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
 
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 
@@ -8,6 +8,12 @@ else
   with_cuda=no
 fi
 
+if [[ -d "/opt/rocm" ]]; then
+  with_rocm=/opt/rocm
+else
+  with_rocm=no
+fi
+
 function install_ucx() {
   set -ex
   git clone --recursive https://github.com/openucx/ucx.git
@@ -19,6 +25,7 @@ function install_ucx() {
   ./configure --prefix=$UCX_HOME      \
       --enable-mt                     \
       --with-cuda=$with_cuda          \
+      --with-rocm=$with_rocm          \
       --enable-profiling              \
       --enable-stats
   time make -j
@@ -36,12 +43,29 @@ function install_ucc() {
   git submodule update --init --recursive
 
   ./autogen.sh
+
   # We only run distributed tests on Tesla M60 and A10G
   NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
+
+  if [[ -n "$ROCM_VERSION" ]]; then
+    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
+      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
+    else
+      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
+    fi
+    for arch in $amdgpu_targets; do
+      HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
+    done
+  else
+    HIP_OFFLOAD="all-arch-no-native"
+  fi
+
   ./configure --prefix=$UCC_HOME          \
     --with-ucx=$UCX_HOME                  \
     --with-cuda=$with_cuda                \
-    --with-nvcc-gencode="${NVCC_GENCODE}"
+    --with-nvcc-gencode="${NVCC_GENCODE}" \
+    --with-rocm=$with_rocm                \
+    --with-rocm-arch="${HIP_OFFLOAD}"
   time make -j
   sudo make install
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-41e7ffa8b7ff09206aa5b9b5c1bbd82b9e0ff277`
	`1`	`+f936c9992f9730e4996fd6ac16ed6cdc723190af`
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ function prune_126 {`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`function install_128 {`
`164`		`- CUDNN_VERSION=9.7.0.66`
	`164`	`+ CUDNN_VERSION=9.7.1.26`
`165`	`165`	`echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"`
`166`	`166`	`rm -rf /usr/local/cuda-12.8 /usr/local/cuda`
`167`	`167`	`# install CUDA 12.8.0 in the same container`