8000 Merge pull request #24092 from Aser-Abdelfatah:GSoC_Support_GatherEle… · thewoz/opencv@ca1b8bf · GitHub
[go: up one dir, main page]

Skip to content

Commit ca1b8bf

Browse files
Aser-Abdelfatahthewoz
authored andcommitted
Merge pull request opencv#24092 from Aser-Abdelfatah:GSoC_Support_GatherElements_ONNX
GSoC Add ONNX Support for GatherElements opencv#24092 Merge with: opencv/opencv_extra#1082 Adds support to the ONNX operator GatherElements [operator docs](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherElements) Added tests to opencv_extra at pull request opencv/opencv_extra#1082 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 parent e86f30a commit ca1b8bf

File tree

8 files changed

+303
-3
lines changed

8 files changed

+303
-3
lines changed

modules/dnn/include/opencv2/dnn/all_layers.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,22 @@ CV__DNN_INLINE_NS_BEGIN
343343
static Ptr<GatherLayer> create(const LayerParams& params);
344344
};
345345

346+
/** @brief GatherElements layer
347+
* GatherElements takes two inputs data and indices of the same rank r >= 1 and an optional attribute axis and works such that:
348+
* output[i][j][k] = data[index[i][j][k]][j][k] if axis = 0 and r = 3
349+
* output[i][j][k] = data[i][index[i][j][k]][k] if axis = 1 and r = 3
350+
* output[i][j][k] = data[i][j][index[i][j][k]] if axis = 2 and r = 3
351+
*
352+
* Gather, on the other hand, takes a data tensor of rank r >= 1, and indices tensor of rank q, and works such that:
353+
* it gathers the enteries along axis dimension of the input data indexed by indices and concatenates them in an output tensor of rank q + (r - 1)
354+
* e.g. If axis = 0, let k = indices[i_{0}, ..., i_{q-1}] then output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2}] = input[k , j_{0}, ..., j_{r-2}]:
355+
**/
356+
class CV_EXPORTS GatherElementsLayer : public Layer
357+
{
358+
public:
359+
static Ptr<GatherElementsLayer> create(const LayerParams& params);
360+
};
361+
346362
class CV_EXPORTS PoolingLayer : public Layer
347363
{
348364
public:

modules/dnn/perf/perf_layer.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,56 @@ PERF_TEST_P_(Layer_LayerNormExpanded, DISABLED_LayerNormExpanded)
633633
test_layer({N, H ,W});
634634
}
635635

636+
struct Layer_GatherElements : public TestBaseWithParam<tuple<Backend, Target> >
637+
{
638+
void test_layer(const std::vector<int>& data_shape, const std::vector<int>& indices_shape, int axis = 0)
639+
{
640+
int backendId = get<0>(GetParam());
641+
int targetId = get<1>(GetParam());
642+
643+
Mat data(data_shape, CV_32FC1);
644+
Mat indices(indices_shape, CV_32FC1);
645+
646+
randu(data, 0.f, 1.f);
647+
randu(indices, 0, data_shape[axis]);
648+
649+
Net net;
650+
LayerParams lp;
651+
lp.type = "GatherElements";
652+
lp.name = "testLayer";
653+
lp.set("axis", axis);
654+
int id = net.addLayerToPrev(lp.name, lp.type, lp);
655+
net.connect(0, 0, id, 0);
656+
net.connect(0, 1, id, 1);
657+
658+
// warmup
659+
{
660+
std::vector<String> inpNames(3);
661+
inpNames[0] = "data";
662+
inpNames[1] = "indices";
663+
net.setInputsNames(inpNames);
664+
net.setInput(data, inpNames[0]);
665+
net.setInput(indices, inpNames[1]);
666+
667+
net.setPreferableBackend(backendId);
668+
net.setPreferableTarget(targetId);
669+
Mat out = net.forward();
670+
}
671+
672+
TEST_CYCLE()
673+
{
674+
Mat res = net.forward();
675+
}
676+
677+
SANITY_CHECK_NOTHING();
678+
}
679+
};
680+
681+
PERF_TEST_P_(Layer_GatherElements, GatherElements)
682+
{
683+
test_layer({2700, 1, 2914}, {2700, 1, 81}, 2);
684+
}
685+
636686
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
637687
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
638688
#ifdef HAVE_CUDA
@@ -642,6 +692,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN
642692
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
643693
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
644694
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
695+
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
645696

646697

647698
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;

modules/dnn/src/init.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ void initializeLayerFactory()
157157
CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer);
158158
CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer);
159159
CV_DNN_REGISTER_LAYER_CLASS(Gather, GatherLayer);
160+
CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);
160161
CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer);
161162
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
162163

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#include "../precomp.hpp"
6+
#include <opencv2/dnn/shape_utils.hpp>
7+
8+
namespace cv { namespace dnn {
9+
10+
static inline int calculateOffset(int outer_dim, const MatShape &shape_indices, int axis_skip, const MatStep &step_data) {
11+
int offset = 0;
12+
for (int axis = static_cast<int>(shape_indices.size()) - 2; axis >= 0; axis--) {
13+
int dim = shape_indices[axis];
14+
if (axis != axis_skip) {
15+
offset += (outer_dim % dim) * step_data[axis];
16+
}
17+
outer_dim /= dim;
18+
}
19+
return offset;
20+
}
21+
22+
class GatherElementsLayerImpl CV_FINAL : public GatherElementsLayer
23+
{
24+
public:
25+
GatherElementsLayerImpl(const LayerParams& params)
26+
{
27+
setParamsFrom(params);
28+
axis = params.get<int>("axis", 0);
29+
}
30+
31+
virtual bool supportBackend(int backendId) CV_OVERRIDE
32+
{
33+
return backendId == DNN_BACKEND_OPENCV;
34+
}
35+
36+
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
37+
const int requiredOutputs,
38+
std::vector<MatShape> &outputs,
39+
std::vector<MatShape> &internals) const CV_OVERRIDE
40+
{
41+
CV_CheckEQ(inputs.size(), 2ull, "GatherElements: requires two inputs");
42+
43+
const auto &data = inputs[0];
44+
const auto &indices = inputs[1];
45+
CV_CheckEQ(data.size(), indices.size(), "GatherElements: data and indices should have the same dimension");
46+
47+
int normalized_axis = normalize_axis(axis, static_cast<int>(data.size()));
48+
CV_CheckGE(normalized_axis, 0, "GatherElements: axis out of range");
49+
CV_CheckLT(normalized_axis, static_cast<int>(data.size()), "GatherElements: axis out of range");
50+
for (size_t i = 0; i < data.size(); i++) {
51+
if (i != normalized_axis) {
52+
CV_CheckEQ(data[i], indices[i], "GatherElements: shape mismatched");
53+
}
54+
}
55+
56+
outputs.assign(1, inputs[1]); // shape of output is same as indices
57+
return false;
58+
}
59+
60+
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE {
61+
std::vector<Mat> inputs;
62+
inputs_arr.getMatVector(inputs);
63+
64+
const auto &data = inputs[0];
65+
axis = normalize_axis(axis, data.dims);
66+
}
67+
68+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
69+
{
70+
CV_TRACE_FUNCTION();
71+
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
72+
73+
std::vector<Mat> inputs, outputs;
74+
inputs_arr.getMatVector(inputs);
75+
outputs_arr.getMatVector(outputs);
76+
77+
const Mat& data = inputs[0];
78+
const Mat& indices = inputs[1];
79+
Mat& out = outputs[0];
80+
81+
typeDispatch(outputs[0].type(), data, indices, out);
82+
}
83+
84+
template <typename T>
85+
void forward_impl(const Mat& data_, const Mat& indices_, Mat& out_)
86+
{
87+
const auto *ptr_data = data_.ptr<const T>();
88+
const auto *ptr_indices = indices_.ptr<const T>();
89+
auto *ptr_out = out_.ptr<T>();
90+
91+
const auto shape_data = shape(data_);
92+
const auto &step_data = data_.step;
93+
const auto shape_indices = shape(indices_);
94+
95+
int inner_most_dim = shape_indices.back();
96+
int axis_dim = shape_data[axis];
97+
size_t axis_step = static_cast<size_t>(step_data[axis] / sizeof(T));
98+
99+
bool innermost_axis = axis == static_cast<int>(shape_data.size() - 1);
100+
101+
auto fn = [&](const Range &r) {
102+
for (int i = r.start; i < r.end; i++) {
103+
auto *data = ptr_data + static_cast<size_t>(calculateOffset(i, shape_indices, axis, step_data) / sizeof(T));
104+
auto *indices = ptr_indices + i * inner_most_dim;
105+
auto *out = ptr_out + i * inner_most_dim;
106+
107+
if (innermost_axis) {
108+
for (int j = 0; j < inner_most_dim; j++) {
109+
int index = static_cast<int>((indices[j] + axis_dim)) % axis_dim; // TODO: Check out-of-range index
110+
out[j] = data[index];
111+
}
112+
} else {
113+
for (int j = 0; j < inner_most_dim; j++) {
114+
int index = static_cast<int>(indices[j] + axis_dim) % axis_dim; // TODO: Check out-of-range index
115+
out[j] = data[index * axis_step + j];
116+
}
117+
}
118+
}
119+
};
120+
121+
int outer_dims = total(shape_indices, 0, shape_indices.size() - 1);
122+
double nstripes = static_cast<size_t>(outer_dims * inner_most_dim * (1 / 1024.0));
123+
parallel_for_(Range(0, outer_dims), fn, nstripes);
124+
}
125+
126+
template<typename... Args>
127+
inline void typeDispatch(const int type, Args&&... args)
128+
{
129+
switch (type)
130+
{
131+
case CV_8U:
132+
forward_impl<uint8_t>(std::forward<Args>(args)...);
133+
break;
134+
case CV_32S:
135+
forward_impl<int32_t>(std::forward<Args>(args)...);
136+
break;
137+
case CV_32F:
138+
forward_impl<float>(std::forward<Args>(args)...);
139+
break;
140+
default:
141+
CV_Error(cv::Error::BadDepth, "DNN/GatherElements: Unsupported type.");
142+
};
143+
}
144+
145+
private:
146+
int axis;
147+
};
148+
149+
Ptr<GatherElementsLayer> GatherElementsLayer::create(const LayerParams& params)
150+
{
151+
return makePtr<GatherElementsLayerImpl>(params);
152+
}
153+
154+
}} // namespace cv::dnn

modules/dnn/src/onnx/onnx_importer.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ class ONNXImporter
179179
void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
180180
void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
181181
void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
182+
void parseGatherElements (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
182183
void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
183184
void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
184185
void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
@@ -2553,6 +2554,53 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node
25532554
addLayer(layerParams, node_proto);
25542555
}
25552556

2557+
void ONNXImporter::parseGatherElements(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2558+
{
2559+
CV_CheckEQ(node_proto.input_size(), 2, "GatherElements: two inputs are required");
2560+
2561+
size_t num_const = 0;
2562+
for (size_t i = 0; i < node_proto.input_size(); ++i){
2563+
if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
2564+
++num_const;
2565+
}
2566+
2567+
if (num_const == node_proto.input_size())
2568+
{
2569+
std::vector<Mat> inputs, output;
2570+
for (size_t i = 0; i < node_proto.input_size(); i++) {
2571+
Mat blob = getBlob(node_proto, i);
2572+
if (i == 1) { // indices, from int32/int64 to float32 for compatibility
2573+
blob.convertTo(blob, CV_32F);
2574+
}
2575+
inputs.push_back(blob);
2576+
}
2577+
runLayer(layerParams, inputs, output);
2578+
CV_Assert(output.size() == 1);
2579+
addConstant(node_proto.output(0), output[0]);
2580+
return;
2581+
} else if (num_const > 0) {
2582+
for (size_t i = 0; i < node_proto.input_size(); i++) {
2583+
if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) {
2584+
Mat blob = getBlob(node_proto, i);
2585+
if (i == 1) { // indices, from int32/int64 to float32 for compatibility
2586+
blob.convertTo(blob, CV_32F);
2587+
}
2588+
2589+
LayerParams constParams;
2590+
constParams.name = node_proto.input(i);
2591+
constParams.type = "Const";
2592+
constParams.blobs.push_back(blob);
2593+
2594+
opencv_onnx::NodeProto proto;
2595+
proto.add_output(constParams.name);
2596+
addLayer(constParams, proto);
2597< 57E6 code class="diff-text syntax-highlighted-line addition">+
}
2598+
}
2599+
}
2600+
2601+
addLayer(layerParams, node_proto);
2602+
}
2603+
25562604
void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
25572605
{
25582606
bool hasVariableInps = false;
@@ -3901,6 +3949,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
39013949
dispatch["Cast"] = &ONNXImporter::parseCast;
39023950
dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill;
39033951
dispatch["Gather"] = &ONNXImporter::parseGather;
3952+
dispatch["GatherElements"] = &ONNXImporter::parseGatherElements;
39043953
dispatch[ B3E2 "Concat"] = &ONNXImporter::parseConcat;
39053954
dispatch["Resize"] = &ONNXImporter::parseResize;
39063955
dispatch["Upsample"] = &ONNXImporter::parseUpsample;

modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@
5555
"test_flatten_negative_axis1",
5656
"test_flatten_negative_axis2",
5757
"test_flatten_negative_axis4",
58+
"test_gather_elements_0",
59+
"test_gather_elements_1",
60+
"test_gather_elements_negative_indices",
5861
"test_logsoftmax_default_axis",
5962
"test_maxpool_2d_dilations",
6063
"test_maxpool_2d_same_lower",

modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,6 @@
115115
"test_gather_0",
116116
"test_gather_1",
117117
"test_gather_2d_indices",
118-
"test_gather_elements_0",
119-
"test_gather_elements_1",
120-
"test_gather_elements_negative_indices",
121118
"test_gather_negative_indices",
122119
"test_gathernd_example_float32",
123120
"test_gathernd_example_int32",

modules/dnn/test/test_onnx_importer.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "test_precomp.hpp"
1010
#include "npy_blob.hpp"
1111
#include <opencv2/dnn/shape_utils.hpp>
12+
#include <numeric>
1213
namespace opencv_test { namespace {
1314

1415
template<typename TString>
@@ -2134,6 +2135,34 @@ TEST_P(Test_ONNX_nets, Alexnet)
21342135
expectNoFallbacksFromIE(net);
21352136
}
21362137

2138+
TEST_P(Test_ONNX_nets, RAFT)
2139+
{
2140+
applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG, CV_TEST_TAG_MEMORY_2GB);
2141+
2142+
std::string weight_path = _tf("models/optical_flow_estimation_raft_2023aug.onnx", false);
2143+
std::string img0_path = findDataFile(std::string("gpu/opticalflow/frame0.png"));
2144+
std::string img1_path = findDataFile(std::string("gpu/opticalflow/frame1.png"));
2145+
2146+
Size target_size{480, 360};
2147+
auto img0 = imread(img0_path);
2148+
auto img1 = imread(img1_path);
2149+
auto blob0 = blobFromImage(img0, 1.0, target_size, 0, true);
2150+
auto blob1 = blobFromImage(img1, 1.0, target_size, 0, true);
2151+
2152+
auto net = readNet(weight_path);
2153+
net.setInput(blob0, "0");
2154+
net.setInput(blob1, "1");
2155+
std::vector<std::string> outnames{"12007", "12006"};
2156+
std::vector<Mat> outs;
2157+
net.forward(outs, outnames);
2158+
2159+
// output 12006 is not checked to save space in opencv_extra since its ref is > 1MB,
2160+
// and output 12006 is calculated from 12007 so checking 12007 is sufficient.
2161+
std::string ref_12700_path = _tf("data/output_optical_flow_estimation_raft_2023aug.npy");
2162+
auto ref0 = blobFromNPY(ref_12700_path);
2163+
normAssert(ref0, outs[0], "", 1e-5, 1.8e-4);
2164+
}
2165+
21372166
TEST_P(Test_ONNX_nets, Squeezenet)
21382167
{
21392168
testONNXModels("squeezenet", pb);

0 commit comments

Comments
 (0)
0