pytorch
diff --git a/‎aten/src/ATen/DeviceAccelerator.cpp
Lines changed: 4 additions & 52 deletions b/‎aten/src/ATen/DeviceAccelerator.cpp
Lines changed: 4 additions & 52 deletions
diff --git a/‎aten/src/ATen/DeviceAccelerator.h
Lines changed: 3 additions & 31 deletions b/‎aten/src/ATen/DeviceAccelerator.h
Lines changed: 3 additions & 31 deletions
diff --git a/‎test/test_accelerator.py
Lines changed: 0 additions & 73 deletions b/‎test/test_accelerator.py
Lines changed: 0 additions & 73 deletions
diff --git a/‎test/test_cuda.py
Lines changed: 0 additions & 8 deletions b/‎test/test_cuda.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎test/tes 8000 t_xpu.py
Lines changed: 0 additions & 8 deletions b/‎test/tes 8000 t_xpu.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎torch/csrc/DeviceAccelerator.cpp
Lines changed: 32 additions & 15 deletions b/‎torch/csrc/DeviceAccelerator.cpp
Lines changed: 32 additions & 15 deletions
diff --git a/‎torch/csrc/utils/device_lazy_init.h
Lines changed: 0 additions & 8 deletions b/‎torch/csrc/utils/device_lazy_init.h
Lines changed: 0 additions & 8 deletions
@@ -1,8 +1,6 @@
 #include <ATen/Context.h>
 #include <ATen/DeviceAccelerator.h>
-#include <c10/core/impl/VirtualGuardImpl.h>
-
-namespace at::accelerator {
+namespace at {
 
 std::optional<c10::DeviceType> getAccelerator(bool checked) {
 #define DETECT_AND_ASSIGN_ACCELERATOR(device_name) \
@@ -39,8 +37,8 @@ std::optional<c10::DeviceType> getAccelerator(bool checked) {
 #undef DETECT_AND_ASSIGN_ACCELERATOR
 }
 
-bool isAccelerator(c10::DeviceType device_type) {
-  switch (device_type) {
+bool isAccelerator(c10::DeviceType d) {
+  switch (d) {
     case at::kCUDA:
     case at::kMTIA:
     case at::kXPU:
@@ -54,50 +52,4 @@ bool isAccelerator(c10::DeviceType device_type) {
   }
 }
 
-c10::DeviceIndex deviceCount() {
-  const auto device_type = getAccelerator(false);
-  if (!device_type.has_value()) {
-    return static_cast<c10::DeviceIndex>(0);
-  }
-  c10::impl::VirtualGuardImpl impl(device_type.value());
-  return static_cast<c10::DeviceIndex>(impl.deviceCount());
-}
-
-void setDeviceIndex(c10::DeviceIndex device_index) {
-  const auto device_type = getAccelerator(true).value();
-  c10::impl::VirtualGuardImpl impl(device_type);
-  impl.setDevice({device_type, device_index});
-}
-
-c10::DeviceIndex getDeviceIndex() {
-  const auto device_type = getAccelerator(true).value();
-  c10::impl::VirtualGuardImpl impl(device_type);
-  return static_cast<c10::DeviceIndex>(impl.getDevice().index());
-}
-
-void setCurrentStream(c10::Stream stream) {
-  const auto device_type = getAccelerator(true).value();
-  TORCH_CHECK(
-      device_type == stream.device_type(),
-      "stream's device type ",
-      c10::DeviceTypeName(stream.device_type()),
-      " doesn't match the current accelerator ",
-      c10::DeviceTypeName(device_type));
-  c10::impl::VirtualGuardImpl impl(device_type);
-  impl.exchangeStream(stream);
-}
-
-c10::Stream getCurrentStream(c10::DeviceIndex device_index) {
-  const auto device_type = getAccelerator(true).value();
-  c10::impl::VirtualGuardImpl impl(device_type);
-  return impl.getStream({device_type, device_index});
-}
-
-void synchronizeDevice(c10::DeviceIndex device_index) {
-  const auto device_type = getAccelerator(true).value();
-  c10::impl::VirtualGuardImpl impl(device_type);
-  // impl.synchronizeDevice should can be safely called from any device
-  impl.synchronizeDevice(device_index);
-}
-
-} // namespace at::accelerator
+} // namespace at
@@ -6,8 +6,6 @@
 #include <ATen/detail/MTIAHooksInterface.h>
 #include <optional>
 
-namespace at::accelerator {
-
 // This file defines the top level Accelerator concept for PyTorch.
 // A device is an accelerator per the definition here if:
 // - It is mutually exclusive with all other accelerators
@@ -17,39 +15,13 @@ namespace at::accelerator {
 // As of today, accelerator devices are (in no particular order):
 // CUDA, MTIA, XPU, HIP, MPS, PrivateUse1
 
+namespace at {
+
 // Ensures that only one accelerator is available (at
 // compile time if possible) and return it.
 // When checked is true, the returned optional always has a value.
 TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
 
-// Check if the given device type is an accelerator.
-TORCH_API bool isAccelerator(c10::DeviceType device_type);
-
-// Return the number of the device available. Note that this is *REQUIRED* to
-// not raise any exception.
-TORCH_API c10::DeviceIndex deviceCount();
-
-// Set the current device index to the given device index.
-TORCH_API void setDeviceIndex(c10::DeviceIndex device_index);
-
-// Get the current device index.
-TORCH_API c10::DeviceIndex getDeviceIndex();
+TORCH_API bool isAccelerator(c10::DeviceType d);
 
-// Set the current stream to a given stream. Note that this API doesn't change
-// the current device index.
-TORCH_API void setCurrentStream(c10::Stream stream);
-
-// Get the current stream of the given device index.
-TORCH_API c10::Stream getCurrentStream(c10::DeviceIndex device_index);
-
-// Wait (by blocking the calling thread) until all the work previously enqueued
-// on the given device index has been completed.
-TORCH_API void synchronizeDevice(c10::DeviceIndex device_index);
-
-} // namespace at::accelerator
-
-namespace at {
-// Keep BC only
-using at::accelerator::getAccelerator;
-using at::accelerator::isAccelerator;
 } // namespace at
@@ -725,14 +725,6 @@ def test_generic_stream_event(self):
         self.assertTrue(issubclass(type(cuda_event), torch.Event))
         self.assertTrue(torch.Event in type(cuda_event).mro())
 
-    def test_stream_compatibility(self):
-        s1 = torch.cuda.Stream()
-        s2 = torch.cuda.Stream()
-        torch.accelerator.set_stream(s1)
-        self.assertEqual(torch.accelerator.current_stream().stream_id, s1.stream_id)
-        torch.accelerator.set_stream(s2)
-        self.assertEqual(torch.accelerator.current_stream().stream_id, s2.stream_id)
-
     def test_record_stream(self):
         cycles_per_ms = get_cycles_per_ms()
 
 
@@ -299,14 +299,6 @@ def test_generic_stream_event(self):
         self.assertTrue(issubclass(type(xpu_event), torch.Event))
         self.assertTrue(torch.Event in type(xpu_event).mro())
 
-    def test_stream_compatibility(self):
-        s1 = torch.xpu.Stream()
-        s2 = torch.xpu.Stream()
-        torch.accelerator.set_stream(s1)
-        self.assertEqual(torch.accelerator.current_stream().stream_id, s1.stream_id)
-        torch.accelerator.set_stream(s2)
-        self.assertEqual(torch.accelerator.current_stream().stream_id, s2.stream_id)
-
     def test_generator(self):
         torch.manual_seed(2024)
         g_state0 = torch.xpu.get_rng_state()
 
@@ -1,3 +1,4 @@
+#include <c10/core/DeviceGuard.h>
 #include <torch/csrc/DeviceAccelerator.h>
 #include <torch/csrc/utils/device_lazy_init.h>
 
@@ -12,52 +13,68 @@ void initModule(PyObject* module) {
   });
 
   m.def("_accelerator_deviceCount", []() {
-    auto device_type = at::accelerator::getAccelerator(false);
-    torch::utils::maybe_initialize_device(device_type);
-    return at::accelerator::deviceCount();
+    const auto device_type = at::getAccelerator(false);
+    if (!device_type.has_value()) {
+      return static_cast<c10::DeviceIndex>(0);
+    }
+    torch::utils::maybe_initialize_device(device_type.value());
+    c10::impl::VirtualGuardImpl impl(device_type.value());
+    return static_cast<c10::DeviceIndex>(impl.deviceCount());
   });
 
   m.def("_accelerator_setDeviceIndex", [](c10::DeviceIndex device_index) {
+    const auto device_type = at::getAccelerator(true).value();
     // If device index is negative, no-op
     if (device_index < 0) {
       return;
     }
-    const auto device_type = at::accelerator::getAccelerator(true).value();
     torch::utils::maybe_initialize_device(device_type);
-    at::accelerator::setDeviceIndex(device_index);
+    c10::impl::VirtualGuardImpl impl(device_type);
+    impl.setDevice({device_type, device_index});
   });
 
   m.def("_accelerator_getDeviceIndex", []() {
-    const auto device_type = at::accelerator::getAccelerator(true).value();
+    const auto device_type = at::getAccelerator(true).value();
     torch::utils::maybe_initialize_device(device_type);
-    return at::accelerator::getDeviceIndex();
+    c10::impl::VirtualGuardImpl impl(device_type);
+    return static_cast<c10::DeviceIndex>(impl.getDevice().index());
   });
 
   m.def("_accelerator_setStream", [](c10::Stream stream) {
-    const auto device_type = at::accelerator::getAccelerator(true).value();
+    const auto device_type = at::getAccelerator(true).value();
+    TORCH_CHECK(
+        device_type == stream.device_type(),
+        "stream's device type ",
+        c10::DeviceTypeName(stream.device_type()),
+        " doesn't match the current accelerator ",
+        c10::DeviceTypeName(device_type));
     torch::utils::maybe_initialize_device(device_type);
+    c10::impl::VirtualGuardImpl impl(device_type);
     // Set the current device to the device of stream
-    if (at::accelerator::getDeviceIndex() != stream.device_index()) {
-      at::accelerator::setDeviceIndex(stream.device_index());
+    if (impl.getDevice().index() != stream.device_index()) {
+      impl.setDevice(stream.device());
     }
-    at::accelerator::setCurrentStream(stream);
+    impl.exchangeStream(stream);
   });
 
   m.def("_accelerator_getStream", [](c10::DeviceIndex device_index) {
-    const auto device_type = at::accelerator::getAccelerator(true).value();
+    const auto device_type = at::getAccelerator(true).value();
     torch::utils::maybe_initialize_device(device_type);
-    return at::accelerator::getCurrentStream(device_index);
+    c10::impl::VirtualGuardImpl impl(device_type);
+    return impl.getStream({device_type, device_index});
   });
 
   m.def("_accelerator_synchronizeDevice", [](c10::DeviceIndex device_index) {
-    const auto device_type = at::accelerator::getAccelerator(true).value();
+    const auto device_type = at::getAccelerator(true).value();
     if (!torch::utils::is_device_initialized(device_type)) {
       return;
     }
     torch::utils::maybe_initialize_device(device_type);
+    c10::impl::VirtualGuardImpl impl(device_type);
+    // impl.synchronizeDevice should can be safely called from any device
     {
       py::gil_scoped_release no_gil;
-      at::accelerator::synchronizeDevice(device_index);
+      impl.synchronizeDevice(device_index);
     }
   });
 }
 
@@ -46,14 +46,6 @@ inline void maybe_initialize_device(const at::TensorOptions& options) {
   maybe_initialize_device(device);
 }
 
-inline void maybe_initialize_device(
-    std::optional<at::DeviceType>& device_type) {
-  if (!device_type.has_value()) {
-    return;
-  }
-  maybe_initialize_device(device_type.value());
-}
-
 bool is_device_initialized(at::DeviceType device_type);
 
 } // namespace torch::utils