pytorch
diff --git a/‎torch/testing/_internal/common_cuda.py
Lines changed: 15 additions & 13 deletions b/‎torch/testing/_internal/common_cuda.py
Lines changed: 15 additions & 13 deletions
@@ -39,32 +39,34 @@
 IS_JETSON = LazyVal(lambda: torch.cuda.is_available() and (torch.cuda.get_device_capability() in [(7, 2), (8, 7)] or IS_THOR))
 IS_SM89 = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() == (8, 9))
 
-def CDNA2OrLater():
-    if TEST_WITH_ROCM:
-        gcn_arch_name = torch.cuda.get_device_properties('cuda').gcnArchName
-        return any(arch in gcn_arch_name for arch in {"gfx90a", "gfx942"})
-    return False
-
-def evaluate_gfx_arch_exact(matching_arch):
+def evaluate_gfx_arch_within(arch_list):
     if not torch.cuda.is_available():
         return False
     gcn_arch_name = torch.cuda.get_device_properties('cuda').gcnArchName
-    arch = os.environ.get('PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE', gcn_arch_name)
-    return arch == matching_arch
+    effective_arch = os.environ.get('PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE', gcn_arch_name)
+    # gcnArchName can be complicated strings like gfx90a:sramecc+:xnack-
+    # Hence the matching should be done reversely
+    return any(arch in effective_arch for arch in arch_list)
 
-GFX90A_Exact = LazyVal(lambda: evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-'))
-GFX942_Exact = LazyVal(lambda: evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-'))
+def CDNA2OrLater():
+    return evaluate_gfx_arch_within(["gfx90a", "gfx942"])
 
 def evaluate_platform_supports_flash_attention():
     if TEST_WITH_ROCM:
-        return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
+        arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        if os.environ.get("TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL", "0") != "0":
+            arch_list += ["gfx1201", "gfx950"]
+        return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return not IS_WINDOWS and SM80OrLater
     return False
 
 def evaluate_platform_supports_efficient_attention():
     if TEST_WITH_ROCM:
-        return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
+        arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        if os.environ.get("TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL", "0") != "0":
+            arch_list += ["gfx1201", "gfx950"]
+        return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return True
     return False