pytorch
diff --git a/‎.ci/docker/common/install_onnx.sh
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/common/install_onnx.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/close_nonexistent_disable_issues.py
Lines changed: 14 additions & 3 deletions b/‎.github/scripts/close_nonexistent_disable_issues.py
Lines changed: 14 additions & 3 deletions
diff --git a/‎.github/workflows/close-nonexistent-disable-issues.yml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/close-nonexistent-disable-issues.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎aten/src/ATen/Context.h
Lines changed: 0 additions & 4 deletions b/‎aten/src/ATen/Context.h
Lines changed: 0 additions & 4 deletions
diff --git a/‎aten/src/ATen/DeviceAccelerator.cpp
Lines changed: 22 additions & 37 deletions b/‎aten/src/ATen/DeviceAccelerator.cpp
Lines changed: 22 additions & 37 deletions
diff --git a/‎aten/src/ATen/cuda/detail/CUDAHooks.h
Lines changed: 0 additions & 2 deletions b/‎aten/src/ATen/cuda/detail/CUDAHooks.h
Lines changed: 0 additions & 2 deletions
diff --git a/‎aten/src/ATen/detail/AcceleratorHooksInterface.h
Lines changed: 0 additions & 17 deletions b/‎aten/src/ATen/detail/AcceleratorHooksInterface.h
Lines changed: 0 additions & 17 deletions
diff --git a/‎aten/src/ATen/miopen/Descriptors.h
Lines changed: 4 additions & 1 deletion b/‎aten/src/ATen/miopen/Descriptors.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎aten/src/ATen/miopen/miopen-wrapper.h
Lines changed: 18 additions & 0 deletions b/‎aten/src/ATen/miopen/miopen-wrapper.h
Lines changed: 18 additions & 0 deletions
@@ -31,8 +31,8 @@ pip_install \
 pip_install coloredlogs packaging
 
 pip_install onnxruntime==1.18.1
-pip_install onnx==1.16.2
-pip_install onnxscript==0.1.0.dev20241124 --no-deps
+pip_install onnx==1.17.0
+pip_install onnxscript==0.1.0 --no-deps
 # required by onnxscript
 pip_install ml_dtypes
 
 
@@ -339,7 +339,7 @@ onnx==1.17.0
 #Pinned versions:
 #test that import:
 
-onnxscript==0.1.0.dev20240817
+onnxscript==0.1.0
 #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
 #Pinned versions:
 #test that import:
 
@@ -107,16 +107,20 @@ def close_issue(num: int) -> None:
         "Accept": "application/vnd.github.v3+json",
         "Authorization": f"token {os.environ['GITHUB_TOKEN']}",
     }
-    requests.post(
+    response = requests.post(
         f"https://api.github.com/repos/pytorch/pytorch/issues/{num}/comments",
         data=json.dumps({"body": CLOSING_COMMENT}),
         headers=headers,
     )
-    requests.patch(
+    if response.status_code != 201:
+        raise RuntimeError(f"Failed to comment on issue {num}: {response.text}")
+    response = requests.patch(
         f"https://api.github.com/repos/pytorch/pytorch/issues/{num}",
         data=json.dumps({"state": "closed"}),
         headers=headers,
     )
+    if response.status_code != 200:
+        raise RuntimeError(f"Failed to close issue {num}: {response.text}")
 
 
 def check_if_exists(
@@ -190,6 +194,13 @@ def check_if_exists(
     if args.dry_run:
         print("dry run, not actually closing")
     else:
+        failed = False
         for item in to_be_closed:
             _, (num, _, _) = item
-            close_issue(num)
+            try:
+                close_issue(num)
+            except RuntimeError as e:
+                print(e)
+                failed = True
+        if failed:
+            sys.exit(1)
@@ -7,6 +7,8 @@ on:
 jobs:
   close-nonexistent-disable-issues:
     environment: rockset-read-only
+    permissions:
+      issues: write
     if: github.repository_owner == 'pytorch'
     runs-on: ubuntu-latest
     steps:
 
@@ -409,11 +409,7 @@ class TORCH_API Context {
   bool enabled_cudnnSDP = true;
   bool enabled_overrideable = true;
   bool allow_fp16_bf16_reduction_mathSDP = false;
-#ifdef USE_ROCM
-  bool benchmark_cudnn = true;
-#else
   bool benchmark_cudnn = false;
-#endif
   Float32MatmulPrecision float32_matmul_precision =
       c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true
       ? at::Float32MatmulPrecision::HIGH
 
@@ -5,53 +5,38 @@
 namespace at::accelerator {
 
 std::optional<c10::DeviceType> getAccelerator(bool checked) {
-  // 1. Check PrivateUse1 backends
-  // We explicitly allow PrivateUse1 and another device at the same time as we
-  // use this for testing. Whenever a PrivateUse1 device is registered, use it
-  // first.
-  // Note that this check is only for hook registration and thus is NOT initializing
-  // the device or poisoning fork.
-  if (is_privateuse1_backend_registered()) {
-    return kPrivateUse1;
+#define DETECT_AND_ASSIGN_ACCELERATOR(device_name) \
+  if (at::has##device_name()) {                    \
+    device_type = k##device_name;                  \
+    TORCH_CHECK(                                   \
+        !is_accelerator_detected,                  \
+        "Cannot have ",                            \
+        device_type.value(),                       \
+        " with other accelerators.");              \
+    is_accelerator_detected = true;                \
   }
 
-  // 2. Check runtime backends
-  // This state is temporary, these runtime checks should be moved to compile-time
-  // once they provide the new isBuilt API and we are sure they're never in the
-  // same binary as another accelerator.
-#define DETECT_RUNTIME_ACCELERATOR(device_name)     \
-  if (at::has##device_name()) {                     \
-    return k##device_name;                          \
+  if (is_privateuse1_backend_registered()) {
+    // We explicitly allow PrivateUse1 and another device at the same time as we
+    // use this for testing. Whenever a PrivateUse1 device is registered, use it
+    // first.
+    return kPrivateUse1;
   }
-
-  DETECT_RUNTIME_ACCELERATOR(MTIA)
-  DETECT_RUNTIME_ACCELERATOR(HPU)
-
-#undef DETECT_RUNTIME_ACCELERATOR
-
-  // 2. Check compile-time backends
   std::optional<c10::DeviceType> device_type = std::nullopt;
-
-#define DETECT_AND_ASSIGN_ACCELERATOR_COMP(device_name) \
-  if (at::detail::get##device_name##Hooks().isBuilt()) {  \
-    TORCH_CHECK(                                         \
-        !device_type.has_value(),                        \
-        "Cannot have both " #device_name " and ",             \
-        device_type.value(), ".");                       \
-    device_type = k##device_name;                        \
-  }
-
-  DETECT_AND_ASSIGN_ACCELERATOR_COMP(CUDA)
-  DETECT_AND_ASSIGN_ACCELERATOR_COMP(XPU)
-  DETECT_AND_ASSIGN_ACCELERATOR_COMP(HIP)
-  DETECT_AND_ASSIGN_ACCELERATOR_COMP(MPS)
+  bool is_accelerator_detected = false;
+  DETECT_AND_ASSIGN_ACCELERATOR(CUDA)
+  DETECT_AND_ASSIGN_ACCELERATOR(MTIA)
+  DETECT_AND_ASSIGN_ACCELERATOR(XPU)
+  DETECT_AND_ASSIGN_ACCELERATOR(HIP)
+  DETECT_AND_ASSIGN_ACCELERATOR(MPS)
+  DETECT_AND_ASSIGN_ACCELERATOR(HPU)
   if (checked) {
     TORCH_CHECK(
         device_type, "Cannot access accelerator device when none is available.")
   }
   return device_type;
 
-#undef DETECT_AND_ASSIGN_ACCELERATOR_COMP
+#undef DETECT_AND_ASSIGN_ACCELERATOR
 }
 
 bool isAccelerator(c10::DeviceType device_type) {
 
@@ -33,8 +33,6 @@ struct CUDAHooks : public at::CUDAHooksInterface {
   bool hasROCM() const override;
   const at::cuda::NVRTC& nvrtc() const override;
   DeviceIndex current_device() const override;
-  bool isBuilt() const override {return true;}
-  bool isAvailable() const override {return hasCUDA();}
   bool hasPrimaryContext(DeviceIndex device_index) const override;
   Allocator* getCUDADeviceAllocator() const override;
   Allocator* getPinnedMemoryAllocator() const override;
 
@@ -20,23 +20,6 @@ struct TORCH_API AcceleratorHooksInterface {
   // squelch -Werror=non-virtual-dtor
   virtual ~AcceleratorHooksInterface() = default;
 
-  // Whether this backend was enabled at compilation time.
-  // This function should NEVER throw.
-  virtual bool isBuilt() const {
-    return false;
-  }
-
-  // Whether this backend can be used at runtime, meaning it was built,
-  // its runtime dependencies are available (driver) and at least one
-  // supported device can be used.
-  // This function should NEVER throw. This function should NOT initialize the context
-  // on any device (result of hasPrimaryContext below should not change).
-  // While it is acceptable for this function to poison fork, it is
-  // recommended to avoid doing so whenever possible.
-  virtual bool isAvailable() const {
-    return false;
-  }
-
   // Whether the device at device_index is fully initialized or not.
   virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0;
 
 
@@ -111,10 +111,13 @@ struct ConvolutionDescriptor
                       &miopenCreateConvolutionDescriptor,
                       &miopenDestroyConvolutionDescriptor>
 {
-  void set(miopenDataType_t dataType, miopenConvolutionMode_t c_mode,  int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool deterministic) {
+  void set(miopenDataType_t dataType, miopenConvolutionMode_t c_mode,  int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool benchmark, bool deterministic) {
     MIOPEN_CHECK(miopenInitConvolutionNdDescriptor(mut_desc(), dim, pad, stride, upscale, c_mode));
     MIOPEN_CHECK(miopenSetConvolutionGroupCount(mut_desc(), groups));
     MIOPEN_CHECK(miopenSetConvolutionAttribute(mut_desc(), MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC, deterministic ? 1 : 0));
+    if (benchmark) {
+      MIOPEN_CHECK(miopenSetConvolutionFindMode(mut_desc(), miopenConvolutionFindModeNormal));
+    }
   }
 };
 
 
@@ -1,3 +1,21 @@
 #pragma once
 
 #include <miopen/miopen.h>
+#include <miopen/version.h>
+
+#if MIOPEN_VERSION_MAJOR > 3 || (MIOPEN_VERSION_MAJOR == 3 && MIOPEN_VERSION_MINOR >= 4)
+// miopen 3.4 moved find mode from private header to public header
+#else
+// from miopen_internal.h
+extern "C" {
+
+typedef enum
+{
+    miopenConvolutionFindModeNormal        = 1, /*!< Normal mode */
+} miopenConvolutionFindMode_t;
+
+miopenStatus_t miopenSetConvolutionFindMode(
+    miopenConvolutionDescriptor_t convDesc,
+    miopenConvolutionFindMode_t findMode);
+}
+#endif
Original file line number	Diff line number	Diff line change
`@@ -111,10 +111,13 @@ struct ConvolutionDescriptor`
`111`	`111`	`&miopenCreateConvolutionDescriptor,`
`112`	`112`	`&miopenDestroyConvolutionDescriptor>`
`113`	`113`	`{`
`114`		`- void set(miopenDataType_t dataType, miopenConvolutionMode_t c_mode, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool deterministic) {`
	`114`	`+ void set(miopenDataType_t dataType, miopenConvolutionMode_t c_mode, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool benchmark, bool deterministic) {`
`115`	`115`	`MIOPEN_CHECK(miopenInitConvolutionNdDescriptor(mut_desc(), dim, pad, stride, upscale, c_mode));`
`116`	`116`	`MIOPEN_CHECK(miopenSetConvolutionGroupCount(mut_desc(), groups));`
`117`	`117`	`MIOPEN_CHECK(miopenSetConvolutionAttribute(mut_desc(), MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC, deterministic ? 1 : 0));`
	`118`	`+ if (benchmark) {`
	`119`	`+ MIOPEN_CHECK(miopenSetConvolutionFindMode(mut_desc(), miopenConvolutionFindModeNormal));`
	`120`	`+ }`
`118`	`121`	`}`
`119`	`122`	`};`
`120`	`123`