pytorch
diff --git a/‎test/distributed/test_c10d_common.py
Lines changed: 7 additions & 1 deletion b/‎test/distributed/test_c10d_common.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎test/distributed/test_device_mesh.py
Lines changed: 2 additions & 1 deletion b/‎test/distributed/test_device_mesh.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎torch/_C/_distributed_c10d.pyi
Lines changed: 3 additions & 12 deletions b/‎torch/_C/_distributed_c10d.pyi
Lines changed: 3 additions & 12 deletions
diff --git a/‎torch/csrc/distributed/c10d/ProcessGroup.cpp
Lines changed: 2 additions & 20 deletions b/‎torch/csrc/distributed/c10d/ProcessGroup.cpp
Lines changed: 2 additions & 20 deletions
diff --git a/‎torch/csrc/distributed/c10d/ProcessGroup.hpp
Lines changed: 24 additions & 28 deletions b/‎torch/csrc/distributed/c10d/ProcessGroup.hpp
Lines changed: 24 additions & 28 deletions
diff --git a/‎torch/csrc/distributed/c10d/init.cpp
Lines changed: 34 additions & 26 deletions b/‎torch/csrc/distributed/c10d/init.cpp
Lines changed: 34 additions & 26 deletions
diff --git a/‎torch/distributed/device_mesh.py
Lines changed: 3 additions & 2 deletions b/‎torch/distributed/device_mesh.py
Lines changed: 3 additions & 2 deletions
@@ -1815,6 +1815,7 @@ def test_init_process_group_optional_backend(self):
 
     def test_init_process_group_for_all_backends(self):
         for backend in dist.Backend.backend_list:
+            excepted_backend = backend
             # skip if the backend is not available on the system
             if backend == dist.Backend.UNDEFINED:
                 continue
@@ -1830,6 +1831,11 @@ def test_init_process_group_for_all_backends(self):
             elif backend == dist.Backend.UCC:
                 if not dist.is_ucc_available():
                     continue
+            # Multi-threaded PG is defined as a pure python class.
+            # Its pg.name() does not going through Pybind, so its backend name
+            # is still "threaded" instead of "custom".
+            elif backend != "threaded":
+                excepted_backend = "custom"
 
             with tempfile.NamedTemporaryFile(delete=False) as f:
                 store = dist.FileStore(f.name, self.world_size)
@@ -1842,7 +1848,7 @@ def test_init_process_group_for_all_backends(self):
                 pg = c10d._get_default_group()
                 self.assertEqual(pg.rank(), self.rank)
                 self.assertEqual(pg.size(), self.world_size)
-                self.assertEqual(pg.name(), str(backend))
+                self.assertEqual(pg.name(), str(excepted_backend))
 
                 dist.destroy_process_group()
 
 
@@ -232,7 +232,8 @@ def test_set_mesh_dim_group_options(self):
 
         mesh_tensor = torch.arange(4).reshape(2, 2)
         mesh = DeviceMesh(device_type, mesh_tensor)
-        self.assertEqual(mesh.get_group(1)._get_backend_name(), "fake")
+        # Fake pg only have BackendType as BackendType::CUSTOM.
+        self.assertEqual(mesh.get_group(1)._get_backend_name(), "custom")
 
 
 class DeviceMeshTestNDim(DTensorTestBase):
 
@@ -296,15 +296,6 @@ class Backend:
     def _set_default_timeout(self, timeout: timedelta) -> None: ...
 
 class ProcessGroup:
-    class Options:
-        def __init__(self, backend: str, timeout: timedelta = ...) -> None: ...
-        @property
-        def backend(self) -> str: ...
-        @property
-        def _timeout(self) -> timedelta: ...
-        @_timeout.setter
-        def _timeout(self, val: timedelta) -> None: ...
-
     class BackendType(Enum):
         UNDEFINED = ...
         GLOO = ...
@@ -318,7 +309,6 @@ class ProcessGroup:
         store: Store,
         rank: int,
         size: int,
-        options: Options,
     ) -> None: ...
     def rank(self) -> int: ...
     def size(self) -> int: ...
@@ -508,6 +498,7 @@ class ProcessGroup:
     @property
     def _device_types(self) -> list[torch.device]: ...
     def _get_backend(self, device: torch.device) -> Backend: ...
+    def _set_default_backend(self, backend_type: BackendType) -> None: ...
     def _register_backend(
         self,
         device: torch.device,
@@ -532,7 +523,7 @@ class ProcessGroup:
 class ProcessGroupGloo(Backend):
     class Device: ...
 
-    class Options(ProcessGroup.Options):
+    class Options(Backend.Options):
         devices: list[ProcessGroupGloo.Device]
         threads: int
 
@@ -562,7 +553,7 @@ class ProcessGroupNCCL(Backend):
         min_ctas: int
         max_ctas: int
 
-    class Options(ProcessGroup.Options):
+    class Options(Backend.Options):
         config: ProcessGroupNCCL.NCCLConfig
         is_high_priority_stream: bool
         split_from: ProcessGroupNCCL
 
@@ -14,22 +14,6 @@
 
 namespace c10d {
 
-static ProcessGroup::BackendType strToBackendType(std::string_view backend) {
-  if (backend == "undefined") {
-    return ProcessGroup::BackendType::UNDEFINED;
-  } else if (backend == "gloo") {
-    return ProcessGroup::BackendType::GLOO;
-  } else if (backend == "nccl") {
-    return ProcessGroup::BackendType::NCCL;
-  } else if (backend == "ucc") {
-    return ProcessGroup::BackendType::UCC;
-  } else if (backend == "mpi") {
-    return ProcessGroup::BackendType::MPI;
-  } else {
-    return ProcessGroup::BackendType::CUSTOM;
-  }
-}
-
 std::string opTypeToString(OpType opType) {
   switch (opType) {
     case OpType::BROADCAST:
@@ -119,13 +103,11 @@ c10::intrusive_ptr<Backend> ProcessGroup::getBackend(
 ProcessGroup::ProcessGroup(
     const c10::intrusive_ptr<::c10d::Store>& store,
     int rank,
-    int size,
-    c10::intrusive_ptr<Options> options)
+    int size)
     : store_(store),
       rank_(rank),
       size_(size),
-      options_(std::move(options)),
-      backendType_(strToBackendType(options_->backend)),
+      backendType_(BackendType::UNDEFINED),
       dist_debug_level_(debug_level()) {
   C10_LOG_API_USAGE_ONCE("c10d.process_group");
 }
 
@@ -45,24 +45,6 @@ namespace c10d {
 //
 class TORCH_API ProcessGroup : public torch::CustomClassHolder {
  public:
-  // ProcessGroup Options is a base struct that defines the basic options
-  // when constructing a ProcessGroup. Each ProcessGroup subclass should
-  // extend this struct and define its options if it wants to provide more
-  // config options (beyond basic ones defined here) to end user.
-  struct TORCH_API Options : torch::CustomClassHolder {
-    explicit Options(
-        std::string backend,
-        std::chrono::milliseconds timeout = kProcessGroupDefaultTimeout)
-        : timeout(timeout), backend(std::move(backend)) {}
-    ~Options() override = default;
-
-    std::chrono::milliseconds timeout;
-
-    // backend name
-    // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
-    const std::string backend;
-  };
-
   enum BackendType : uint8_t {
     UNDEFINED = 0,
     GLOO = 1,
@@ -72,15 +54,31 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
     CUSTOM = 5,
   };
 
+  static std::string backendTypeToString(BackendType type) {
+    switch (type) {
+      case BackendType::GLOO:
+        return "gloo";
+      case BackendType::NCCL:
+        return "nccl";
+      case BackendType::UCC:
+        return "ucc";
+      case BackendType::MPI:
+        return "mpi";
+      case BackendType::UNDEFINED:
+        return "undefined";
+      default:
+        return "custom";
+    }
+  };
+
   // Not used, set for backwards compatibility and only used for TypeDef in
   // Ops.cpp
   explicit ProcessGroup(int rank, int size);
 
   explicit ProcessGroup(
       const c10::intrusive_ptr<::c10d::Store>& store,
       int rank,
-      int size,
-      c10::intrusive_ptr<Options> options);
+      int size);
   ~ProcessGroup() override;
 
   int getRank() const {
@@ -103,7 +101,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
   }
 
   virtual const std::string getBackendName() const {
-    return options_->backend;
+    return backendTypeToString(backendType_);
   };
 
   BackendType getBackendType() const {
@@ -609,10 +607,6 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
         opts.timeout.count());
   }
 
-  c10::intrusive_ptr<Options> getOptions() {
-    return options_;
-  }
-
   bool hasBackends() {
     return !deviceTypeToBackendType_.empty();
   }
@@ -653,6 +647,10 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
     return backendTypeToBackend_.at(backendType_);
   }
 
+  void setDefaultBackend(const BackendType& backendType) {
+    backendType_ = backendType;
+  }
+
   c10::intrusive_ptr<Backend> getBackend(c10::DeviceType deviceType);
 
   c10::intrusive_ptr<Backend> getBackend(BackendType backendType) const {
@@ -725,9 +723,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
   const int size_;
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
-  const c10::intrusive_ptr<Options> options_;
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
-  const BackendType backendType_;
+  BackendType backendType_;
   std::string pg_desc_;
 
   // Debug level setting. It is parsed once when ProcessGroup is constructed and
 
@@ -1814,8 +1814,7 @@ communication mechanism.
               py::init<
                   const c10::intrusive_ptr<::c10d::Store>&,
                   int,
-                  int,
-                  c10::intrusive_ptr<::c10d::ProcessGroup::Options>>(),
+                  int>(),
               py::call_guard<py::gil_scoped_release>())
           .def("rank", &::c10d::ProcessGroup::getRank)
           .def("size", &::c10d::ProcessGroup::getSize)
@@ -1825,7 +1824,6 @@ communication mechanism.
               "_backend_id",
               &::c10d::ProcessGroup::getBackendID,
               py::arg("backend_type"))
-          .def_property_readonly("options", &::c10d::ProcessGroup::getOptions)
           .def(
               "broadcast",
               &::c10d::ProcessGroup::broadcast,
@@ -2135,6 +2133,14 @@ communication mechanism.
               },
               py::arg("device"),
               py::call_guard<py::gil_scoped_release>())
+           .def(
+              "_set_default_backend",
+              [](const c10::intrusive_ptr<::c10d::ProcessGroup>& self,
+                 const ::c10d::ProcessGroup::BackendType& backendType) {
+                return self->setDefaultBackend(backendType);
+              },
+              py::arg("backend_type"),
+              py::call_guard<py::gil_scoped_release>())
           .def(
               "_register_on_completion_hook",
               [](const c10::intrusive_ptr<::c10d::ProcessGroup>& self,
@@ -2237,27 +2243,6 @@ The hook must have the following signature:
       .value("CUSTOM", ::c10d::ProcessGroup::BackendType::CUSTOM)
       .export_values();
 
-  // base ProcessGroup::Options binding
-  auto processGroupOptions =
-      intrusive_ptr_class_<::c10d::ProcessGroup::Options>(
-          processGroup,
-          "Options",
-          R"(
-Base class for all processes group options implementations, such as the nccl
-options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
-)")
-          .def(
-              py::init([](const std::string& backend,
-                          const std::chrono::milliseconds& timeout) {
-                return c10::make_intrusive<::c10d::ProcessGroup::Options>(
-                    backend, timeout);
-              }),
-              py::arg("backend"),
-              py::arg("timeout") = kProcessGroupDefaultTimeout,
-              py::call_guard<py::gil_scoped_release>())
-          .def_readonly("backend", &::c10d::ProcessGroup::Options::backend)
-          .def_readwrite("_timeout", &::c10d::ProcessGroup::Options::timeout);
-
   // TODO: The collection definitions handles direct instantiation of
   // ProcessGroup subclasses (e.g. dist.ProcessGroupGloo). This is not supported
   // and should be removed once all tests are transitioned
@@ -2556,6 +2541,29 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
               &::c10d::Backend::endCoalescing,
               py::call_guard<py::gil_scoped_release>());
 
+  // base Backend::Options binding
+  // TODO: Maybe we can consider how to merge this with
+  // `DistributedBackendOptions`.
+  auto backendOptions =
+      intrusive_ptr_class_<::c10d::Backend::Options>(
+          backend,
+          "Options",
+          R"(
+Base class for all backend options implementations, such as the nccl
+options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
+)")
+          .def(
+              py::init([](const std::string& backend,
+                          const std::chrono::milliseconds& timeout) {
+                return c10::make_intrusive<::c10d::Backend::Options>(
+                    backend, timeout);
+              }),
+              py::arg("backend"),
+              py::arg("timeout") = kProcessGroupDefaultTimeout,
+              py::call_guard<py::gil_scoped_release>())
+          .def_readonly("backend", &::c10d::Backend::Options::backend)
+          .def_readwrite("_timeout", &::c10d::Backend::Options::timeout);
+
 #ifdef USE_C10D_GLOO
   static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME";
 
@@ -2567,7 +2575,7 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
   shared_ptr_class_<::gloo::transport::Device>(processGroupGloo, "Device");
 
   intrusive_ptr_class_<::c10d::ProcessGroupGloo::Options>(
-      processGroupGloo, "_Options", processGroupOptions)
+      processGroupGloo, "_Options", backendOptions)
       .def(py::init<>())
       .def_readwrite("_devices", &::c10d::ProcessGroupGloo::Options::devices)
       .def_readwrite("_threads", &::c10d::ProcessGroupGloo::Options::threads);
@@ -2794,7 +2802,7 @@ for details.
   intrusive_ptr_class_<::c10d::ProcessGroupNCCL::Options>(
       processGroupNCCL,
       "Options",
-      processGroupOptions,
+      backendOptions,
       R"(
 ProcessGroup options for the NCCL backend
 
 
@@ -36,6 +36,7 @@ def _init_device_mesh_stub():
 
 
 else:
+    from torch._C._distributed_c10d import Backend as C10dBackend
     from torch.distributed.distributed_c10d import (
         _find_pg_by_ranks_and_tag,
         _get_default_group,
@@ -66,7 +67,7 @@ def __init__(self) -> None:
             self.mesh_stack: List[DeviceMesh] = []
             self.child_to_root_mapping: Dict[DeviceMesh, DeviceMesh] = {}
             self.mesh_dim_group_options: Dict[
-                int, Tuple[str, Optional[ProcessGroup.Options]]
+                int, Tuple[str, Optional[C10dBackend.Options]]
             ] = {}
             self.root_to_flatten_mapping: Dict[DeviceMesh, Dict[str, DeviceMesh]] = {}
             # Record flatten mesh name to its mesh dim index in root mesh.
@@ -279,7 +280,7 @@ def _set_mesh_dim_group_options(
             self,
             dim: int,
             backend: str,
-            pg_options: Optional[ProcessGroup.Options] = None,
+            pg_options: Optional[C10dBackend.Options] = None,
         ) -> None:
             self.mesh_dim_group_options[dim] = (backend, pg_options)