Update on "[c10d] Remove Option for ProcessGroup and Expose backend O…

…ptions to reflect the correct code structure" We introduced the dispatchable backend for a ProcessGroup and collective in #86225. This PR is a follow-up cleanup to clean up the option of a ProcessGroup and ask users to either set timeout or backend later on or directly create backend after creating a PG. Also PGNCCL is using option class from ProcessGroup but we actually should use Option from backend class. So this PR is to make the type or name to be aligned with what we are doing in cpp side. I don't change the signature for the public API, so they still use args named "pg_options" cc XilunWu H-Huang awgu kwen2501 wanchaol fegin wz337 wconstab d4l3k c-p-i-o ezyang gchanan [ghstack-poisoned]
pytorch · fduwjj · Aug 7, 2024 · Aug 7, 2024 · Aug 27, 2024 · Aug 27, 2024
commit 144800c73f23b31e956b7a36a258693099dfee6e
diff --git a/test/allowlist_for_publicAPI.json b/test/allowlist_for_publicAPI.json
@@ -63,6 +63,7 @@
     "BroadcastOptions",
     "BuiltinCommHookType",
     "Callable",
+    "C10dBackend",
     "DebugLevel",
     "Dict",
     "Enum",

diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py
@@ -1815,6 +1815,7 @@ def test_init_process_group_optional_backend(self):
 
     def test_init_process_group_for_all_backends(self):
         for backend in dist.Backend.backend_list:
+            excepted_backend = backend
             # skip if the backend is not available on the system
             if backend == dist.Backend.UNDEFINED:
                 continue
@@ -1830,6 +1831,11 @@ def test_init_process_group_for_all_backends(self):
             elif backend == dist.Backend.UCC:
                 if not dist.is_ucc_available():
                     continue
+            # Multi-threaded PG is defined as a pure python class.
+            # Its pg.name() does not going through Pybind, so its backend name
+            # is still "threaded" instead of "custom".
+            elif backend != "threaded":
+                excepted_backend = "custom"
 
             with tempfile.NamedTemporaryFile(delete=False) as f:
                 store = dist.FileStore(f.name, self.world_size)
@@ -1842,7 +1848,8 @@ def test_init_process_group_for_all_backends(self):
                 pg = c10d._get_default_group()
                 self.assertEqual(pg.rank(), self.rank)
                 self.assertEqual(pg.size(), self.world_size)
-                self.assertEqual(pg.name(), str(backend))
+                print(backend, excepted_backend, pg.name())
+                self.assertEqual(pg.name(), str(excepted_backend))
 
                 dist.destroy_process_group()
 

diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py
@@ -232,7 +232,8 @@ def test_set_mesh_dim_group_options(self):
 
         mesh_tensor = torch.arange(4).reshape(2, 2)
         mesh = DeviceMesh(device_type, mesh_tensor)
-        self.assertEqual(mesh.get_group(1)._get_backend_name(), "fake")
+        # Fake pg only have BackendType as BackendType::CUSTOM.
+        self.assertEqual(mesh.get_group(1)._get_backend_name(), "custom")
 
 
 class DeviceMeshTestNDim(DTensorTestBase):

diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi
@@ -498,6 +498,7 @@ class ProcessGroup:
     @property
     def _device_types(self) -> list[torch.device]: ...
     def _get_backend(self, device: torch.device) -> Backend: ...
+    def _set_default_backend(self, backend_type: BackendType) -> None: ...
     def _register_backend(
         self,
         device: torch.device,

diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp
@@ -65,8 +65,9 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
       case BackendType::MPI:
         return "mpi";
       case BackendType::UNDEFINED:
-      default:
         return "undefined";
+      default:
+        return "custom";
     }
   };
 
@@ -646,6 +647,10 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
     return backendTypeToBackend_.at(backendType_);
   }
 
+  void setDefaultBackend(const BackendType& backendType) {
+    backendType_ = backendType;
+  }
+
   c10::intrusive_ptr<Backend> getBackend(c10::DeviceType deviceType);
 
   c10::intrusive_ptr<Backend> getBackend(BackendType backendType) const {
@@ -718,7 +723,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
   const int size_;
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
-  const BackendType backendType_;
+  BackendType backendType_;
   std::string pg_desc_;
 
   // Debug level setting. It is parsed once when ProcessGroup is constructed and

diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp
@@ -2130,6 +2130,14 @@ communication mechanism.
               },
               py::arg("device"),
               py::call_guard<py::gil_scoped_release>())
+           .def(
+              "_set_default_backend",
+              [](const c10::intrusive_ptr<::c10d::ProcessGroup>& self,
+                 const ::c10d::ProcessGroup::BackendType& backendType) {
+                return self->setDefaultBackend(backendType);
+              },
+              py::arg("backend_type"),
+              py::call_guard<py::gil_scoped_release>())
           .def(
               "_register_on_completion_hook",
               [](const c10::intrusive_ptr<::c10d::ProcessGroup>& self,

diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py
@@ -145,7 +145,6 @@ def _export_c_types() -> None:
         AllToAllOptions,
         BarrierOptions,
         BroadcastOptions,
-        C10dBackend,
         GatherOptions,
         PrefixStore,
         ProcessGroup,
@@ -269,6 +268,7 @@ class Backend(str):
         GLOO: ProcessGroup.BackendType.GLOO,
         NCCL: ProcessGroup.BackendType.NCCL,
         UCC: ProcessGroup.BackendType.UCC,
+        MPI: ProcessGroup.BackendType.MPI,
     }
 
     def __new__(cls, name: str):
@@ -1714,6 +1714,8 @@ def _new_process_group_helper(
         group_rank,
         group_size,
     )
+    assert backend in Backend.backend_type_map, f"Unknown backend type {backend}"
+    pg._set_default_backend(Backend.backend_type_map[backend])
     if device_id:
         pg.bound_device_id = device_id
     backend_config = BackendConfig(backend)
@@ -4451,7 +4453,9 @@ def split_group(
         group_rank,
         len(my_group),
     )
+    backend_type = ProcessGroup.BackendType.NCCL
     pg.bound_device_id = device_id
+    pg._set_default_backend(backend_type)
 
     pg_options._timeout = timeout
     pg_options.split_from = parent_backend
@@ -4461,7 +4465,6 @@ def split_group(
     backend_class = ProcessGroupNCCL(
         prefix_store, group_rank, len(my_group), pg_options
     )
-    backend_type = ProcessGroup.BackendType.NCCL
     backend_class._set_sequence_number_for_group()
 
     pg._register_backend(torch.device("cuda"), backend_type, backend_class)
@@ -4608,6 +4611,7 @@ def _new_group_with_tag(
     if not backend:
         backend = default_backend
     backend = Backend(backend)
+    print(backend)
 
     # this timeout defaulting/validation is used for all the new_groups/new_subgroups variants,
     # which may just pass their timeout value (or None)