ROCm
diff --git a/‎CMakeLists.txt
Lines changed: 10 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 10 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
Lines changed: 12 additions & 0 deletions b/‎aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
Lines changed: 12 additions & 0 deletions
diff --git a/‎cmake/Dependencies.cmake
Lines changed: 0 additions & 1 deletion b/‎cmake/Dependencies.cmake
Lines changed: 0 additions & 1 deletion
@@ -773,6 +773,16 @@ cmake_dependent_option(
   Will be disabled if not supported by the platform" ON
   "USE_CUDA" OFF)
 
+#
+# Cannot be put into Dependencies.cmake due circular dependency:
+# USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake
+#
+if(USE_ROCM)
+  if(USE_FLASH_ATTENTION)
+    include(cmake/External/aotriton.cmake)
+  endif()
+endif()
+
 if(DEBUG_CUDA)
   string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")
   string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo")
 
@@ -22,7 +22,10 @@
 #include <functional>
 
 #if USE_ROCM
+#if defined(USE_FLASH_ATTENTION)
 #include <aotriton/flash.h>
+#define USE_AOTRITON 1
+#endif
 #endif
 
 /**
@@ -187,6 +190,7 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
   using sm80 = SMVersion<8, 0>;
   using sm90 = SMVersion<9, 0>;
 #if USE_ROCM
+#if USE_AOTRITON
   auto stream = at::cuda::getCurrentCUDAStream().stream();
   if (hipSuccess != aotriton::v2::flash::check_gpu(stream)) {
       auto dprops = at::cuda::getCurrentDeviceProperties();
@@ -196,6 +200,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
       }
       return false;
   }
+#else
+  return false;
+#endif
 #else
   auto dprops = at::cuda::getCurrentDeviceProperties();
   if (!check_sm_version<sm80, sm90>(dprops)) {
@@ -217,6 +224,9 @@ bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug)
   // Mem Efficient attention supports hardware in the range [sm_50, sm_90]
   using sm50 = SMVersion<5, 0>;
   using sm90 = SMVersion<9, 0>;
+#if USE_ROCM
+  return false;
+#else
   auto dprops = at::cuda::getCurrentDeviceProperties();
   if (!check_sm_version<sm50, sm90>(dprops)) {
     if (debug) {
@@ -230,6 +240,8 @@ bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug)
     return false;
   }
   return true;
+#endif
+  return false;
 }
 
 bool check_requires_grad_and_head_dim_gt192_constraints_on_sm86_89(
 
@@ -1348,7 +1348,6 @@ if(USE_ROCM)
       message(STATUS "Disabling Kernel Assert for ROCm")
     endif()
 
-    include(${CMAKE_CURRENT_LIST_DIR}/External/aotriton.cmake)
     if(USE_CUDA)
       caffe2_update_option(USE_MEM_EFF_ATTENTION OFF)
     endif()