8000 try to fix failure · pytorch/pytorch@dd99ef2 · GitHub
[go: up one dir, main page]

Skip to content

Commit dd99ef2

Browse files
committed
try to fix failure
1 parent 580a675 commit dd99ef2

File tree

4 files changed

+27
-23
lines changed

4 files changed

+27
-23
lines changed

torch/csrc/cuda/nccl.h

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <nccl.h>
4+
35
#include <ATen/ATen.h>
46
#include <ATen/cuda/CUDAContext.h>
57

torch/csrc/cuda/python_nccl.cpp

+23-22
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,44 @@
1515

1616
using namespace at;
1717
using namespace torch;
18-
using namespace torch::cuda::nccl;
19-
using namespace torch::cuda::nccl::detail;
18+
using namespace torch::cuda;
19+
20+
namespace pynccl = torch::cuda::nccl;
2021

2122
static const char* COMM_CAPSULE_NAME = "torch.cuda.nccl.Communicator";
2223

2324
PyObject* THCPModule_nccl_version(PyObject* self, PyObject* args) {
24-
return PyLong_FromUnsignedLongLong(version());
25+
return PyLong_FromUnsignedLongLong(pynccl::version());
2526
}
2627

2728
10000 PyObject* THCPModule_nccl_version_suffix(PyObject* self, PyObject* args) {
2829
HANDLE_TH_ERRORS
29-
return PyBytes_FromString(version_suffix());
30+
return PyBytes_FromString(pynccl::version_suffix());
3031
END_HANDLE_TH_ERRORS
3132
}
3233

3334
PyObject* THCPModule_nccl_unique_id(PyObject* self, PyObject* args) {
3435
HANDLE_TH_ERRORS
35-
ncclUniqueId id;
36+
pynccl::ncclUniqueId id;
3637
get_unique_id(id);
3738
return PyBytes_FromStringAndSize((char*)&id, NCCL_UNIQUE_ID_BYTES);
3839
END_HANDLE_TH_ERRORS
3940
}
4041

41-
static ncclComm_t unpack_nccl_comm(PyObject* capsule) {
42-
ncclComm_t comm =
43-
(ncclComm_t)PyCapsule_GetPointer(capsule, COMM_CAPSULE_NAME);
42+
static pynccl::ncclComm_t unpack_nccl_comm(PyObject* capsule) {
43+
pynccl::ncclComm_t comm =
44+
(pynccl::ncclComm_t)PyCapsule_GetPointer(capsule, COMM_CAPSULE_NAME);
4445
if (!comm)
4546
throw python_error();
4647
return comm;
4748
}
4849

4950
static void destroy_nccl_comm(PyObject* capsule) {
5051
HANDLE_TH_ERRORS
51-
ncclComm_t comm = unpack_nccl_comm(capsule);
52+
pynccl::ncclComm_t comm = unpack_nccl_comm(capsule);
5253
{
5354
pybind11::gil_scoped_release no_gil;
54-
comm_destroy(comm);
55+
pynccl::comm_destroy(comm);
5556
}
5657
END_HANDLE_TH_ERRORS_RET()
5758
}
@@ -73,19 +74,19 @@ static std::vector<std::optional<at::cuda::CUDAStream>> unpack_streams(
7374
static at::Tensor extract_tensor(PyObject* obj);
74 E864 75
static std::vector<at::Tensor> extract_tensors(PyObject* obj);
7576

76-
static std::vector<ncclComm_t> unpack_comms(PyObject* obj, size_t size) {
77+
static std::vector<pynccl::ncclComm_t> unpack_comms(PyObject* obj, size_t size) {
7778
if (obj == Py_None) {
78-
return std::vector<ncclComm_t>();
79+
return std::vector<pynccl::ncclComm_t>();
7980
}
80-
std::vector<ncclComm_t> comms;
81+
std::vector<pynccl::ncclComm_t> comms;
8182
if (PyCapsule_CheckExact(obj)) {
8283
comms = {unpack_nccl_comm(obj)};
8384
} else {
8485
auto seq = THPObjectPtr(PySequence_Fast(obj, "comm is not a sequence"));
8586
if (!seq)
8687
throw python_error();
8788
auto size = PySequence_Fast_GET_SIZE(seq.get());
88-
comms = std::vector<ncclComm_t>(size);
89+
comms = std::vector<pynccl::ncclComm_t>(size);
8990
for (const auto i : c10::irange(size)) {
9091
comms[i] = unpack_nccl_comm(PySequence_Fast_GET_ITEM(seq.get(), i));
9192
}
@@ -116,12 +117,12 @@ PyObject* THCPModule_nccl_init_rank(PyObject* self, PyObject* args) {
116117
id_len,
117118
")");
118119

119-
ncclUniqueId commId;
120+
pynccl::ncclUniqueId commId;
120121
memcpy(&commId, id, NCCL_UNIQUE_ID_BYTES);
121-
ncclComm_t comm = nullptr;
122+
pynccl::ncclComm_t comm = nullptr;
122123
{
123124
pybind11::gil_scoped_release no_gil;
124-
comm = comm_init_rank(nranks, commId, rank);
125+
comm = pynccl::comm_init_rank(nranks, commId, rank);
125126
}
126127
return PyCapsule_New(comm, COMM_CAPSULE_NAME, &destroy_nccl_comm);
127128
END_HANDLE_TH_ERRORS
@@ -153,7 +154,7 @@ PyObject* THCPModule_nccl_reduce(PyObject* self, PyObject* args) {
153154

154155
{
155156
pybind11::gil_scoped_release no_gil;
156-
torch::cuda::nccl::reduce(inputs, output, root, op, streams, user_comms);
157+
pynccl::reduce(inputs, output, root, op, streams, user_comms);
157158
}
158159

159160
Py_RETURN_NONE;
@@ -186,7 +187,7 @@ PyObject* THCPModule_nccl_all_reduce(PyObject* self, PyObject* args) {
186187

187188
{
188189
pybind11::gil_scoped_release no_gil;
189-
all_reduce(inputs, outputs, op, streams, user_comms);
190+
pynccl::all_reduce(inputs, outputs, op, streams, user_comms);
190191
}
191192

192193
Py_RETURN_NONE;
@@ -217,7 +218,7 @@ PyObject* THCPModule_nccl_broadcast(PyObject* self, PyObject* args) {
217218

218219
{
219220
pybind11::gil_scoped_release no_gil;
220-
torch::cuda::nccl::broadcast(inputs, streams, user_comms);
221+
pynccl::broadcast(inputs, streams, user_comms);
221222
}
222223

223224
Py_RETURN_NONE;
@@ -249,7 +250,7 @@ PyObject* THCPModule_nccl_all_gather(PyObject* self, PyObject* args) {
249250

250251
{
251252
pybind11::gil_scoped_release no_gil;
252-
all_gather(inputs, outputs, streams, user_comms);
253+
pynccl::all_gather(inputs, outputs, streams, user_comms);
253254
}
254255

255256
Py_RETURN_NONE;
@@ -282,7 +283,7 @@ PyObject* THCPModule_nccl_reduce_scatter(PyObject* self, PyObject* args) {
282283

283284
{
284285
pybind11::gil_scoped_release no_gil;
285-
reduce_scatter(inputs, outputs, op, streams, user_comms);
286+
pynccl::reduce_scatter(inputs, outputs, op, streams, user_comms);
286287
}
287288

288289
Py_RETURN_NONE;

torch/csrc/distributed/c10d/NCCLUtils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include <ATen/ATen.h>
1313
#include <ATen/cuda/CUDAEvent.h>
1414
#include <c10/util/Exception.h>
15-
#include <nccl.h>
15+
#include <torch/csrc/cuda/nccl.h>
1616
#include <torch/csrc/distributed/c10d/TraceUtils.h>
1717
#include <optional>
1818

torch/csrc/distributed/c10d/quantization/quantization_gpu.cu

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <c10/cuda/CUDAGuard.h>
2+
#include <torch/csrc/cuda/nccl.h>
23
#include <torch/csrc/distributed/c10d/Utils.hpp>
34
#include <torch/csrc/distributed/c10d/quantization/quantization_gpu.h>
45
#include <torch/csrc/distributed/c10d/quantization/quantization_utils.h>

0 commit comments

Comments
 (0)
0