8000 Update on "[inductor] parallel compile: Create new pipes for subproc … · pytorch/pytorch@7984abb · GitHub
[go: up one dir, main page]

Skip to content

Commit 7984abb

Browse files
committed
Update on "[inductor] parallel compile: Create new pipes for subproc communication"
Summary: Rather then using stdin/stdout for IPC, we can create new pipes and pass the descriptors to the subproc via the cmd line. #131070 reports an issue where the combination of deepspeed and onnxruntime-training causes _something_ in the subproc to write to stdout and corrupt the IPC. The current implementation was already brittle; we can just create new pipes specifically for the IPC. Test Plan: I was able to repro the MemoryError in #131070 by installing deepspeed and onnxruntime-training. Verified this PR fixes. cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx ipiszy yf225 chenyang78 kadeng muchulee8 ColinPeppler amjames desertfire chauhang [ghstack-poisoned]
2 parents 731f637 + 4ba2935 commit 7984abb

File tree

270 files changed

+2992
-2569
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

270 files changed

+2992
-2569
lines changed

.github/ci_commit_pins/xla.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
6f0b61e5d782913a0fc7743812f2a8e522189111
1+
9952e3861461d45084694c72a3eb2ff2156ed5d9

CODEOWNERS

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
156156
/torch/csrc/jit/python/init.cpp @mikaylagawarecki
157157

158158
# CUDA and CUDA math libraries
159-
aten/src/ATen/cuda/ @eqy
160-
aten/src/ATen/cudnn/ @eqy
161-
aten/src/ATen/native/cuda/ @eqy
162-
aten/src/ATen/native/cudnn/ @eqy
163-
c10/cuda @eqy
164-
torch/cuda/ @eqy
165-
torch/csrc/cuda/ @eqy
166-
torch/backends/cuda/ @eqy
167-
torch/backends/cudnn/ @eqy
159+
aten/src/ATen/cuda/ @eqy @syed-ahmed
160+
aten/src/ATen/cudnn/ @eqy @syed-ahmed
161+
aten/src/ATen/native/cuda/ @eqy @syed-ahmed
162+
aten/src/ATen/native/cudnn/ @eqy @syed-ahmed
163+
c10/cuda @eqy @syed-ahmed
164+
torch/cuda/ @eqy @syed-ahmed
165+
torch/csrc/cuda/ @eqy @syed-ahmed
166+
torch/backends/cuda/ @eqy @syed-ahmed
167+
torch/backends/cudnn/ @eqy @syed-ahmed

aten/src/ATen/Context.h

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -416,73 +416,73 @@ class TORCH_API Context {
416416

417417
TORCH_API Context& globalContext();
418418

419-
static inline void init() {
419+
inline void init() {
420420
globalContext();
421421
}
422422

423423
TORCH_API Allocator* getCPUAllocator();
424424

425-
static inline DeprecatedTypeProperties& getDeprecatedTypeProperties(
425+
inline DeprecatedTypeProperties& getDeprecatedTypeProperties(
426426
Backend p,
427427
ScalarType s) {
428428
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
429429
p, s);
430430
}
431431

432-
static inline DeprecatedTypeProperties& CPU(ScalarType s) {
432+
inline DeprecatedTypeProperties& CPU(ScalarType s) {
433433
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
434434
Backend::CPU, s);
435435
}
436436

437-
static inline DeprecatedTypeProperties& CUDA(ScalarType s) {
437+
inline DeprecatedTypeProperties& CUDA(ScalarType s) {
438438
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
439439
Backend::CUDA, s);
440440
}
441441

442-
static inline DeprecatedTypeProperties& HIP(ScalarType s) {
442+
inline DeprecatedTypeProperties& HIP(ScalarType s) {
443443
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
444444
Backend::HIP, s);
445445
}
446446

447-
static inline DeprecatedTypeProperties& MPS(ScalarType s) {
447+
inline DeprecatedTypeProperties& MPS(ScalarType s) {
448448
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
449449
Backend::MPS, s);
450450
}
451451

452-
static inline bool hasCUDA() {
452+
inline bool hasCUDA() {
453453
return globalContext().hasCUDA();
454454
}
455455

456-
static inline bool hasMTIA() {
456+
inline bool hasMTIA() {
457457
return globalContext().hasMTIA();
458458
}
459459

460-
static inline bool hasHIP() {
460+
inline bool hasHIP() {
461461
return globalContext().hasHIP();
462462
}
463463

464-
static inline bool hasIPU() {
464+
inline bool hasIPU() {
465465
return globalContext().hasIPU();
466466
}
467467

468-
static inline bool hasXLA() {
468+
inline bool hasXLA() {
469469
return globalContext().hasXLA();
470470
}
471471

472-
static inline bool hasMPS() {
472+
inline bool hasMPS() {
473473
return globalContext().hasMPS();
474474
}
475475

476-
static inline bool hasMAIA() {
476+
inline bool hasMAIA() {
477477
return globalContext().hasMAIA();
478478
}
479479

480-
static inline bool hasXPU() {
480+
inline bool hasXPU() {
481481
return globalContext().hasXPU();
482482
}
483483

484484
// Despite its name, this function returns the number of *CUDA* GPUs.
485-
static inline size_t getNumGPUs() {
485+
inline size_t getNumGPUs() {
486486
// WARNING: DO NOT ADD LOGIC TO HANDLE OTHER DEVICE TYPES TO THIS
487487
// FUNCTION. If you are interested in interrogating the number of
488488
// devices for a specific device type, add that function to the
@@ -501,27 +501,27 @@ static inline size_t getNumGPUs() {
501501
}
502502
}
503503

504-
static inline bool hasOpenMP() {
504+
inline bool hasOpenMP() {
505505
return globalContext().hasOpenMP();
506506
}
507507

508-
static inline bool hasMKL() {
508+
inline bool hasMKL() {
509509
return globalContext().hasMKL();
510510
}
511511

512-
static inline bool hasLAPACK() {
512+
inline bool hasLAPACK() {
513513
return globalContext().hasLAPACK();
514514
}
515515

516-
static inline bool hasMAGMA() {
516+
inline bool hasMAGMA() {
517517
return globalContext().hasMAGMA();
518518
}
519519

520-
static inline bool hasMKLDNN() {
520+
inline bool hasMKLDNN() {
521521
return globalContext().hasMKLDNN();
522522
}
523523

524-
static inline void manual_seed(uint64_t seed) {
524+
inline void manual_seed(uint64_t seed) {
525525
auto gen = globalContext().defaultGenerator(c10::DeviceType::CPU);
526526
{
527527
// See Note [Acquire lock when using random generators]

aten/src/ATen/ExpandUtils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ inline Tensor sum_to(
499499
return _sum_to(std::move(tensor), shape, always_return_non_view);
500500
}
501501

502-
static inline bool is_expandable_to(
502+
inline bool is_expandable_to(
503503
SymIntArrayRef shape,
504504
c10::SymIntArrayRef desired) {
505505
size_t ndim = shape.size();
@@ -517,7 +517,7 @@ static inline bool is_expandable_to(
517517
return true;
518518
}
519519

520-
static inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) {
520+
inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) {
521521
auto sym_shape = c10::SymIntArrayRef(
522522
reinterpret_cast<const c10::SymInt*>(shape.data()), shape.size());
523523
auto sym_desired = c10::SymIntArrayRef(

aten/src/ATen/TensorOperators.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ namespace at {
3333
_(==, x.eq(y), y.eq(x)) \
3434
_(!=, x.ne(y), y.ne(x))
3535

36-
#define DEFINE_OPERATOR(op, body, reverse_scalar_body) \
37-
static inline Tensor operator op(const Tensor& x, const Tensor& y) { \
38-
return body; \
39-
} \
40-
static inline Tensor operator op(const Tensor& x, const Scalar& y) { \
41-
return body; \
42-
} \
43-
static inline Tensor operator op(const Scalar& x, const Tensor& y) { \
44-
return reverse_scalar_body; \
36+
#define DEFINE_OPERATOR(op, body, reverse_scalar_body) \
37+
inline Tensor operator op(const Tensor& x, const Tensor& y) { \
38+
return body; \
39+
} \
40+
inline Tensor operator op(const Tensor& x, const Scalar& y) { \
41+
return body; \
42+
} \
43+
inline Tensor operator op(const Scalar& x, const Tensor& y) { \
44+
return reverse_scalar_body; \
4545
}
4646

4747
AT_FORALL_BINARY_OPS(DEFINE_OPERATOR)

aten/src/ATen/TracerMode.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@
113113

114114
namespace at::tracer::impl {
115115

116-
static inline bool is_dispatch_enabled() {
116+
inline bool is_dispatch_enabled() {
117117
return c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Tracer) &&
118118
!c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer);
119119
}
120120

121-
static inline void set_dispatch_enabled(bool enabled) {
121+
inline void set_dispatch_enabled(bool enabled) {
122122
TORCH_INTERNAL_ASSERT(
123123
!c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer),
124124
"Cannot enable tracing within the scope of NoTracerDispatchMode!");

aten/src/ATen/Utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ TORCH_API int _crash_if_asan(int);
2929
// Converts a TensorList (i.e. ArrayRef<Tensor> to vector of TensorImpl*)
3030
// NB: This is ONLY used by legacy TH bindings, and ONLY used by cat.
3131
// Once cat is ported entirely to ATen this can be deleted!
32-
static inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
32+
inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
3333
ArrayRef<Tensor> tensors,
3434
const char* name,
3535
int pos,

aten/src/ATen/WrapDimUtilsMulti.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace at {
1313

1414
constexpr size_t dim_bitset_size = 64;
1515

16-
static inline std::bitset<dim_bitset_size> dim_list_to_bitset(
16+
inline std::bitset<dim_bitset_size> dim_list_to_bitset(
1717
OptionalIntArrayRef opt_dims,
1818
size_t ndims) {
1919
TORCH_CHECK(

aten/src/ATen/core/Formatting.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ TORCH_API std::ostream& print(
1818
std::ostream& stream,
1919
const Tensor& tensor,
2020
int64_t linesize);
21-
static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
21+
inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
2222
return print(out,t,80);
2323
}
2424
TORCH_API void print(const Tensor & t, int64_t linesize=80);

aten/src/ATen/core/boxing/impl/boxing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ torch::jit::Stack boxArgs(Args... args) {
9393
}
9494

9595
template <class T>
96-
static inline constexpr size_t boxed_size_one() {
96+
inline constexpr size_t boxed_size_one() {
9797
static_assert(!std::is_same<std::decay_t<T>, c10::TensorOptions>::value, "need to patch this path to support TensorOptions passed by reference");
9898
return 1;
9999
}

0 commit comments

Comments
 (0)
0