8000 [caffe2] Export operators to c10 without including ATen/Tensor.h · pytorch/pytorch@90b0de7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 90b0de7

Browse files
committed
[caffe2] Export operators to c10 without including ATen/Tensor.h
With this change, the only caffe2 files that depend on `ATen/Tensor.h` are ones that directly use the ATen API. Specifically, ``` [ "caffe2/CMakeFiles/torch_cuda_cpp.dir/contrib/aten/aten_op_gpu.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/core/tensor.cc.o", "caffe2/CMakeFiles/torch_cuda_cpp.dir/operators/layer_norm_op.cu.o", "caffe2/CMakeFiles/torch_cpu.dir/core/IValueInterface.cc.o", "caffe2/CMakeFiles/cuda_tensor_interop_test.dir/__/aten/src/ATen/test/cuda_tensor_interop_test.cpp.o", "caffe2/CMakeFiles/torch_cpu.dir/contrib/aten/aten_op.cc.o", "caffe2/CMakeFiles/caffe2_pybind11_state_gpu.dir/python/pybind_state.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/operators/layer_norm_op.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/core/export_c10_op_to_caffe2.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/core/export_caffe2_op_to_c10.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/operators/enforce_finite_op.cc.o", "caffe2/CMakeFiles/torch_cpu.dir/core/operator.cc.o", "caffe2/CMakeFiles/tensor_interop_test.dir/__/aten/src/ATen/test/tensor_interop_test.cpp.o", "caffe2/CMakeFiles/caffe2_pybind11_state.dir/python/pybind_state.cc.o" ] ``` ghstack-source-id: f183da6 Pull Request resolved: #67096
1 parent 59b472f commit 90b0de7

File tree

4 files changed

+211
-224
lines changed

4 files changed

+211
-224
lines changed

BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,7 @@ filegroup(
511511
"caffe2/core/db.cc",
512512
"caffe2/core/event.cc",
513513
"caffe2/core/export_c10_op_to_caffe2.cc",
514+
"caffe2/core/export_caffe2_op_to_c10.cc",
514515
"caffe2/core/graph.cc",
515516
"caffe2/core/init.cc",
516517
"caffe2/core/init_denormals.cc",
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#define TORCH_ASSERT_NO_OPERATORS
2+
#include <caffe2/core/export_caffe2_op_to_c10.h>
3+
#undef TORCH_ASSERT_NO_OPERATORS
4+
5+
#if defined(EXPOSE_C2_OPS) || \
6+
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
7+
8+
#include <ATen/core/function_schema.h>
9+
#include <ATen/core/dispatch/Dispatcher.h>
10+
#include <torch/csrc/jit/frontend/function_schema_parser.h>
11+
#include <torch/library.h>
12+
13+
namespace caffe2 {
14+
namespace detail {
15+
16+
// We only manipulate the stack inside of this function, so the header
17+
// does not depend on Tensor or IValue at all.
18+
void call_caffe2_op_from_c10(
19+
const OperatorHandle &opHandle,
20+
c10::Stack* stack,
21+
_CallCaffe2OpFunc* call_op) {
22+
// precondition: on the stack, there's one IValue for each argument of the
23+
// c10 schema. The last argument is an optional tensor list that
24+
// (if not ivalue::None) contains a preallocated output tensor for each
25+
// operator output.
26+
27+
// As an invariant, we don't want any autograd gradients to be tracked in
28+
// Caffe2 operators.
29+
at::NoGradGuard guard;
30+
31+
const auto &schema = opHandle.schema();
32+
AT_ASSERT(
33+
schema.arguments().size() != 0 &&
34+
schema.arguments().back().type()->isSubtypeOf(
35+
*OptionalType::create(ListType::ofTensors())));
36+
IValue preallocated_outputs = torch::jit::pop(*stack);
37+
38+
c10::List<at::Tensor> outputs;
39+
if (preallocated_outputs.isNone()) {
40+
// either the schema doesn't support preallocated outputs or it does but
41+
// they haven't been passed in. Pass a list of uninitialized tensors to
42+
// the caffe2 operator as preallocated outputs.
43+
outputs.resize(schema.returns().size());
44+
} else {
45+
AT_ASSERT(preallocated_outputs.isTensorList());
46+
outputs = std::move(preallocated_outputs).toTensorList();
47+
}
48+
49+
// -1 because the last argument is the list of preallocated tensors
50+
const size_t num_inputs = schema.arguments().size() - 1;
51+
52+
// TODO Avoid vector allocation. One idea would be to keep the std::vector
53+
// instances in the cache.
54+
std::vector<IValue> inputs = torch::jit::pop(*stack, num_inputs);
55+
56+
// Convert outputs to caffe2::Tensor
57+
const size_t num_outputs = outputs.size();
58+
std::vector<caffe2::Tensor> outputs_c2(num_outputs);
59+
for (auto i : c10::irange(num_outputs)) {
60+
outputs_c2[i] = caffe2::Tensor(outputs.extract(i));
61+
}
62+
63+
outputs_c2 = (*call_op)(schema, inputs, std::move(outputs_c2));
64+
TORCH_INTERNAL_ASSERT(num_outputs == outputs_c2.size());
65+
66+
67+
bool return_tensor_list = false;
68+
if (schema.returns().size() == 1) {
69+
auto type = schema.returns()[0].type();
70+
if (c10::ListTypePtr list_type = type->cast<c10::ListType>()) {
71+
if (list_type->getElementType()->kind() == c10::TypeKind::TensorType) {
72+
return_tensor_list = true;
73+
}
74+
}
75+
}
76+
if (return_tensor_list) {
77+
for (const auto i : c10::irange(num_outputs)) {
78+
outputs.set(i, at::Tensor(std::move(outputs_c2[i])));
79+
}
80+
torch::jit::push(*stack, outputs);
81+
} else {
82+
for (const auto i : c10::irange(num_outputs)) {
83+
torch::jit::push(*stack, at::Tensor(std::move(outputs_c2[i])));
84+
}
85+
}
86+
87+
// postcondition: All inputs are cleared from the stack, there's now one
88+
// IValue for each output which holds the result. This
89+
// might reuse one of the preallocated tensors but doesn't have
90+
// to.
91+
}
92+
93+
static FunctionSchema make_function_schema_for_c10(
94+
const char* schema_str,
95+
c10::optional<c10::AliasAnalysisKind> optional_alias_analysis_kind) {
96+
#if !defined(EXPOSE_C2_OPS) && \
97+
(defined(CAFFE2_IS_XPLAT_BUILD) || defined(C10_MOBILE))
98+
throw std::logic_error(
99+
"We don't support registering c10 ops on mobile yet because the function schema parser isn't present in the mobile build.");
100+
#else
101+
c10::FunctionSchema parsed_schema = torch::jit::parseSchema(schema_str);
102+
std::vector<c10::Argument> arguments = parsed_schema.arguments();
103+
arguments.emplace_back(
104+
PREALLOCATED_OUTPUT_ARGNAME,
105+
c10::OptionalType::create(c10::ListType::ofTensors()),
106+
nullopt,
107+
IValue());
108+
109+
auto schema = FunctionSchema(
110+
parsed_schema.name(),
111+
parsed_schema.overload_name(),
112+
std::move(arguments),
113+
parsed_schema.returns(),
114+
parsed_schema.is_vararg(),
115+
parsed_schema.is_varret());
116+
if (optional_alias_analysis_kind) {
117+
schema.setAliasAnalysis(*optional_alias_analysis_kind);
118+
}
119+
return schema;
120+
#endif
121+
}
122+
123+
template <c10::DispatchKey key>
124+
RegisterDefinition<key>::RegisterDefinition(const char *name, c10::BoxedKernel kernel) {
125+
if (c10::impl::dispatch_key_allowlist_check(key)) {
126+
static torch::Library m(
127+
torch::Library::IMPL, "_caffe2", key, __FILE__, __LINE__);
128+
m.impl(name, torch::CppFunction::makeFromBoxedKernel(std::move(kernel)));
129+
}
130+
}
131+
132+
template struct RegisterDefinition<c10::DispatchKey::CPU>;
133+
template struct RegisterDefinition<c10::DispatchKey::CUDA>;
134+
template struct RegisterDefinition<c10::DispatchKey::HIP>;
135+
136+
RegisterSchema::RegisterSchema(
137+
const char *schema_str,
138+
c10::optional<c10::AliasAnalysisKind> optional_alias_analysis_kind) {
139+
static torch::Library m(
140+
torch::Library::FRAGMENT, "_caffe2", c10::nullopt,
141+
__FILE__, __LINE__);
142+
m.def(make_function_schema_for_c10(schema_str, optional_alias_analysis_kind));
143+
}
144+
145+
} // namespace detail
146+
} // namespace caffe2
147+
148+
#endif

0 commit comments

Comments
 (0)
0