pytorch
diff --git a/‎c10/core/AllocatorConfig.cpp‎
Lines changed: 7 additions & 7 deletions b/‎c10/core/AllocatorConfig.cpp‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎c10/core/AllocatorConfig.h‎
Lines changed: 1 addition & 1 deletion b/‎c10/core/AllocatorConfig.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎c10/cuda/CUDAAllocatorConfig.cpp‎
Lines changed: 3 additions & 3 deletions b/‎c10/cuda/CUDAAllocatorConfig.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/test_cuda.py‎
Lines changed: 6 additions & 6 deletions b/‎test/test_cuda.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎torch/_C/__init__.pyi.in‎
Lines changed: 1 addition & 1 deletion b/‎torch/_C/__init__.pyi.in‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch/_dynamo/trace_rules.py‎
Lines changed: 1 addition & 1 deletion b/‎torch/_dynamo/trace_rules.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch/csrc/DeviceAccelerator.cpp‎
Lines changed: 5 additions & 0 deletions b/‎torch/csrc/DeviceAccelerator.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎torch/csrc/cuda/Module.cpp‎
Lines changed: 0 additions & 13 deletions b/‎torch/csrc/cuda/Module.cpp‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎torch/cuda/memory.py‎
Lines changed: 2 additions & 2 deletions b/‎torch/cuda/memory.py‎
Lines changed: 2 additions & 2 deletions
@@ -45,7 +45,7 @@ size_t AcceleratorAllocatorConfig::roundup_power2_divisions(size_t size) {
       63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoStart);
   const size_t interval_end =
       63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoEnd);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       interval_end - interval_start == kRoundUpPowerOfTwoIntervals,
       "kRoundUpPowerOfTwoIntervals mismatch");
 
@@ -64,7 +64,7 @@ size_t AcceleratorAllocatorConfig::parseMaxSplitSize(
       std::numeric_limits<size_t>::max() / kMB;
 
   size_t val_env = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       val_env >= min_allowed_split_size_mb,
       "CachingAllocator option max_split_size_mb too small, must be >= ",
       min_allowed_split_size_mb);
@@ -83,7 +83,7 @@ size_t AcceleratorAllocatorConfig::parseMaxNonSplitRoundingSize(
       std::numeric_limits<size_t>::max() / kMB;
 
   size_t val_env = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       val_env >= min_allowed_split_size_mb,
       "CachingAllocator option max_non_split_rounding_mb too small, must be >= ",
       min_allowed_split_size_mb);
@@ -98,7 +98,7 @@ size_t AcceleratorAllocatorConfig::parseGarbageCollectionThreshold(
     size_t i) {
   tokenizer.checkToken(++i, ":");
   double val_env = tokenizer.toDouble(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       val_env > 0 && val_env < 1.0,
       "garbage_collect_threshold is invalid, set it in (0.0, 1.0)");
   garbage_collection_threshold_ = val_env;
@@ -119,7 +119,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
       size_t value_index = i;
       tokenizer.checkToken(++i, ":");
       size_t value = tokenizer.toSizeT(++i);
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
           value == 0 || llvm::isPowerOf2_64(value),
           "For roundups, the divisions has to be power of 2 or 0 to disable roundup ");
 
@@ -133,7 +133,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
             value);
       } else {
         size_t boundary = tokenizer.toSizeT(value_index);
-        TORCH_CHECK(
+        TORCH_CHECK_VALUE(
             llvm::isPowerOf2_64(boundary),
             "For roundups, the intervals have to be power of 2 ");
 
@@ -163,7 +163,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
         "Expected closing bracket ']' in ConfigTokenizer but reached end of config");
   } else { // Keep this for backwards compatibility
     size_t value = tokenizer.toSizeT(i);
-    TORCH_CHECK(
+    TORCH_CHECK_VALUE(
         llvm::isPowerOf2_64(value),
         "For roundups, the divisions has to be power of 2 ");
     std::fill(
 
@@ -76,7 +76,7 @@ class ConfigTokenizer {
     } else if (token == "False") {
       return false;
     } else {
-      TORCH_CHECK(
+      TORCH_CHECK_VALUE(
           false,
           "Expected 'True' or 'False' at index ",
           i,
 
@@ -22,7 +22,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
 #define PYTORCH_TOKEN2 "hipMallocAsync"
   tokenizer.checkToken(++i, ":");
   i++; // Move to the value after the colon
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       ((tokenizer[i] == "native") || (tokenizer[i] == PYTORCH_TOKEN1) ||
        (tokenizer[i] == PYTORCH_TOKEN2)),
       "Unknown allocator backend, "
@@ -134,12 +134,12 @@ size_t CUDAAllocatorConfig::parsePinnedNumRegisterThreads(
     size_t i) {
   tokenizer.checkToken(++i, ":");
   size_t val2 = tokenizer.toSizeT(++i);
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       llvm::isPowerOf2_64(val2),
       "Number of register threads has to be power of 2 ",
       "");
   auto maxThreads = CUDAAllocatorConfig::pinned_max_register_threads();
-  TORCH_CHECK(
+  TORCH_CHECK_VALUE(
       val2 <= maxThreads,
       "Number of register threads should be less than or equal to " +
           std::to_string(maxThreads),
 
@@ -4471,28 +4471,28 @@ def power2_div(size, div_factor):
         with self.assertRaises(RuntimeError):
             torch.cuda.memory._set_allocator_settings("foo:1,bar:2")
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings(
                 "garbage_collection_threshold:1.2"
             )
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings("max_split_size_mb:2")
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings("release_lock_on_cudamalloc:none")
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings(
                 "pinned_use_cuda_host_register:none"
             )
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings(
                 "pinned_num_register_threads:none"
             )
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(ValueError):
             torch.cuda.memory._set_allocator_settings(
                 "pinned_num_register_threads:1024"
             )
 
@@ -2017,7 +2017,6 @@ def _cuda_cudaHostAllocator() -> _int: ...
 def _cuda_cudaCachingAllocator_raw_alloc(size: _int, cuda_stream: _int) -> _int: ...
 def _cuda_cudaCachingAllocator_raw_delete(ptr: _int) -> None: ...
 def _cuda_cudaCachingAllocator_enable(val: _bool) -> None: ...
-def _cuda_cudaCachingAllocator_set_allocator_settings(env: str) -> None: ...
 def _cuda_beginAllocateToPool(device: _int, mempool_id: tuple[_int, _int]) -> None: ...
 def _cuda_beginAllocateCurrentThreadToPool(
     device: _int,
@@ -2435,6 +2434,7 @@ def _accelerator_getStream(device_index: _int) -> Stream: ...
 def _accelerator_synchronizeDevice(device_index: _int) -> None: ...
 def _accelerator_exchangeDevice(device_index: _int) -> _int: ...
 def _accelerator_maybeExchangeDevice(device_index: _int) -> _int: ...
+def _accelerator_setAllocatorSettings(env: str) -> None: ...
 
 # Defined in torch/csrc/jit/python/python_tracer.cpp
 class TracingState:
 
@@ -447,6 +447,7 @@
         "torch._C._accelerator_getAccelerator",
         "torch._C._accelerator_getDeviceIndex",
         "torch._C._accelerator_getStream",
+        "torch._C._accelerator_setAllocatorSettings",
         "torch._C._accelerator_setStream",
         "torch._C._accelerator_synchronizeDevice",
         "torch._C._activate_gpu_trace",
@@ -503,7 +504,6 @@
         "torch._C._cuda_clearCublasWorkspaces",
         "torch._C._cuda_cudaCachingAllocator_raw_alloc",
         "torch._C._cuda_cudaCachingAllocator_raw_delete",
-        "torch._C._cuda_cudaCachingAllocator_set_allocator_settings",
         "torch._C._cuda_cudaHostAllocator",
         "torch._C._cuda_customAllocator",
         "torch._C._cuda_emptyCache",
 
@@ -1,3 +1,4 @@
+#include <c10/core/AllocatorConfig.h>
 #include <torch/csrc/DeviceAccelerator.h>
 #include <torch/csrc/utils/device_lazy_init.h>
 
@@ -72,6 +73,10 @@ void initModule(PyObject* module) {
     torch::utils::maybe_initialize_device(device_type);
     return a
F913
t::accelerator::maybeExchangeDevice(device_index);
   });
+
+  m.def("_accelerator_setAllocatorSettings", [](std::string env) {
+    c10::CachingAllocator::setAllocatorSettings(env);
+  });
 }
 
 } // namespace torch::accelerator
@@ -422,15 +422,6 @@ PyObject* THCPModule_cudaCachingAllocator_enable(
   END_HANDLE_TH_ERRORS
 }
 
-PyObject* THCPModule_cudaCachingAllocator_set_allocator_settings(
-    PyObject* _unused,
-    PyObject* env) {
-  HANDLE_TH_ERRORS
-  c10::CachingAllocator::setAllocatorSettings(THPUtils_unpackString(env));
-  Py_RETURN_NONE;
-  END_HANDLE_TH_ERRORS
-}
-
 PyObject* THCPModule_getAllocatorBackend(PyObject* _unused, PyObject* noargs) {
   HANDLE_TH_ERRORS
   return THPUtils_packString(c10::cuda::CUDACachingAllocator::name());
@@ -2052,10 +2043,6 @@ static struct PyMethodDef _THCPModule_methods[] = {
      THCPModule_cudaCachingAllocator_enable,
      METH_O,
      nullptr},
-    {"_cuda_cudaCachingAllocator_set_allocator_settings",
-     THCPModule_cudaCachingAllocator_set_allocator_settings,
-     METH_O,
-     nullptr},
     {"_cuda_getAllocatorBackend",
      THCPModule_getAllocatorBackend,
      METH_NOARGS,
 
@@ -1075,8 +1075,8 @@ def _save_memory_usage(filename="output.svg", snapshot=None):
         f.write(_memory(snapshot))
 
-def _set_allocator_settings(env: str):
-    return torch._C._cuda_cudaCachingAllocator_set_allocator_settings(env)
+# Keep for BC only
+_set_allocator_settings = torch._C._accelerator_setAllocatorSettings
 
 
 def get_allocator_backend() -> str: