8000 Merge pull request #24378 from fengyuentau:instance_norm · thewoz/opencv@19e9e85 · GitHub
[go: up one dir, main page]

Skip to content

Commit 19e9e85

Browse files
fengyuentauthewoz
authored andcommitted
Merge pull request opencv#24378 from fengyuentau:instance_norm
dnn onnx: add instance norm layer opencv#24378 Resolves opencv#24377 Relates opencv#24092 (comment) | Perf | multi-thread | single-thread | | - | - | - | | x: [2, 64, 180, 240] | 3.95ms | 11.12ms | Todo: - [x] speed up by multi-threading - [x] add perf - [x] add backend: OpenVINO - [x] add backend: CUDA - [x] add backend: OpenCL (no fp16) - [ ] add backend: CANN (will be done via opencv#24462) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake ``` force_builders=Linux OpenCL,Win64 OpenCL,Custom buildworker:Custom=linux-4 build_image:Custom=ubuntu:18.04 modules_filter:Custom=none disable_ipp:Custom=ON ```
1 parent e51cc0d commit 19e9e85

File tree

10 files changed

+454
-43
lines changed

10 files changed

+454
-43
lines changed

modules/dnn/include/opencv2/dnn/all_layers.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,13 @@ CV__DNN_INLINE_NS_BEGIN
11661166
static Ptr<ExpandLayer> create(const LayerParams &params);
11671167
};
11681168

1169+
class CV_EXPORTS InstanceNormLayer : public Layer {
1170+
public:
1171+
float epsilon;
1172+
1173+
static Ptr<InstanceNormLayer> create(const LayerParams &params);
1174+
};
1175+
11691176
//! @}
11701177
//! @}
11711178
CV__DNN_INLINE_NS_END

modules/dnn/perf/perf_layer.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,62 @@ PERF_TEST_P_(Layer_GatherElements, GatherElements)
683683
test_layer({2700, 1, 2914}, {2700, 1, 81}, 2);
684684
}
685685

686+
struct Layer_InstanceNorm : public TestBaseWithParam<tuple<Backend, Target> >
687+
{
688+
void test_layer(const std::vector<int>& x_shape)
689+
{
690+
int backendId = get<0>(GetParam());
691+
int targetId = get<1>(GetParam());
692+
693+
Mat x(x_shape, CV_32FC1);
694+
Mat scale(x_shape[1], 1, CV_32FC1);
695+
Mat b(x_shape[1], 1, CV_32FC1);
696+
697+
randu(x, 0.f, 1.f);
698+
randu(scale, 0.f, 1.f);
699+
randu(b, 0.f, 1.f);
700+
701+
Net net;
702+
LayerParams lp;
703+
lp.type = "InstanceNormalization";
704+
lp.name = "testLayer";
705+
int id = net.addLayerToPrev(lp.name, lp.type, lp);
706+
net.connect(0, 0, id, 0);
707+
net.connect(0, 1, id, 1);
708+
net.connect(0, 2, id, 2);
709+
710+
// warmup
711+
{
712+
std::vector<String> inpNames{"x", "scale", "b"};
713+
net.setInputsNames(inpNames);
714+
net.setInput(x, inpNames[0]);
715+
net.setInput(scale, inpNames[1]);
716+
net.setInput(b, inpNames[2]);
717+
718+
net.setPreferableBackend(backendId);
719+
net.setPreferableTarget(targetId);
720+
Mat out = net.forward();
721+
}
722+
723+
TEST_CYCLE()
724+
{
725+
Mat res = net.forward();
726+
}
727+
728+
SANITY_CHECK_NOTHING();
729+
}
730+
731+
int N = 2;
732+
int C = 64;
733+
int H = 180;
734+
int W = 240;
735+
};
736+
737+
PERF_TEST_P_(Layer_InstanceNorm, InstanceNorm)
738+
{
739+
test_layer({N, C, H, W});
740+
}
741+
686742
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
687743
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
688744
#ifdef HAVE_CUDA
@@ -693,6 +749,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
693749
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
694750
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
695751
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
752+
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
696753

697754

698755
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;

modules/dnn/src/cuda/mvn.cu

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,17 @@ namespace raw {
6666
output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * scale[outer_idx];
6767
}
6868
}
69+
70+
template <class T>
71+
__global__ void normalize_mean_variance_channelwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, size_type inner_size, size_type C) {
72+
for (auto idx : grid_stride_range(output.size())) {
73+
const index_type outer_idx = idx / inner_size;
74+
const index_type c = outer_idx % C;
75+
auto s = static_cast<float>(scale[c]) * stdev[outer_idx];
76+
auto b = static_cast<float>(bias[c]);
77+
output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * s + b;
78+
}
79+
}
6980
}
7081

7182
template <class T>
@@ -142,4 +153,21 @@ template void normalize_mean_variance(const Stream&, Span<__half>, View<__half>,
142153
#endif
143154
template void normalize_mean_variance(const Stream&, Span<float>, View<float>, View<float>, View<float>, std::size_t);
144155

156+
template <class T>
157+
void normalize_mean_variance_channelwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, std::size_t inner_size, std::size_t C)
158+
{
159+
CV_Assert(input.size() == output.size());
160+
CV_Assert(input.size() / inner_size == means.size());
161+
CV_Assert(means.size() == stdev.size());
162+
163+
auto kernel = raw::normalize_mean_variance_channelwise<T>;
164+
auto policy = make_policy(kernel, output.size(), 0, stream);
165+
launch_kernel(kernel, policy, output, input, scale, bias, means, stdev, inner_size, C);
166+
}
167+
168+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
169+
template void normalize_mean_variance_channelwise(const Stream&, Span<__half> /*output*/, View<__half> /*input*/, View<__half> /*scale*/, View<__half> /*bias*/, View<float> /*means*/, View<float> /*stdev*/, std::size_t, std::size_t);
170+
#endif
171+
template void normalize_mean_variance_channelwise(const Stream&, Span<float> /*output*/, View<float> /*input*/, View<float> /*scale*/, View<float> /*bias*/, View<float> /*means*/, View<float> /*stdev*/, std::size_t, std::size_t);
172+
145173
}}}} /* namespace cv::dnn::cuda4dnn::kernels */

modules/dnn/src/cuda4dnn/kernels/mvn.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ void normalize_mean(const csl::Stream& stream, csl::Span<T> output, csl::View<T>
2626
template <class T>
2727
void normalize_mean_variance(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<float> means, csl::View<float> scale, std::size_t inner_size);
2828

29+
template <class T>
30+
void normalize_mean_variance_channelwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> stdev, std::size_t inner_size, std::size_t C);
31+
2932
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
3033

3134
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP */
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP
6+
#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP
7+
8+
#include "../../op_cuda.hpp"
9+
10+
#include "../csl/stream.hpp"
11+
#include "../csl/span.hpp"
12+
#include "../csl/tensor.hpp"
13+
#include "../csl/workspace.hpp"
14+
15+
#include "../kernels/fill_copy.hpp"
16+
#include "../kernels/mvn.hpp"
17+
18+
#include <opencv2/core.hpp>
19+
20+
#include <cstddef>
21+
#include <vector A851 >
22+
#include <utility>
23+
24+
namespace cv { namespace dnn { namespace cuda4dnn {
25+
26+
template <class T>
27+
class InstanceNormOp final : public CUDABackendNode {
28+
public:
29+
using wrapper_type = GetCUDABackendWrapperType<T>;
30+
31+
InstanceNormOp(csl::Stream stream_, float epsilon_, size_t loops)
32+
: stream(std::move(stream_)), epsilon(epsilon_) {
33+
csl::WorkspaceBuilder builder;
34+
builder.require<float>(loops);
35+
builder.require<float>(loops);
36+
scratch_mem_in_bytes = builder.required_workspace_size();
37+
}
38+
39+
void forward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
40+
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
41+
csl::Workspace& workspace) override {
42+
auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
43+
auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();
44+
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
45+
46+
auto input = input_wrapper->getView();
47+
auto scale = scale_wrapper->getView();
48+
auto bias = bias_wrapper->getView();
49+
50+
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
51+
auto output = output_wrapper->getSpan();
52+
53+
auto C = input.get_axis_size(1);
54+
auto loops = input.size_range(0, 2);
55+
auto norm_size = input.size_range(2, input.rank());
56+
if (norm_size == 1) {
57+
kernels::fill<T>(stream, output, 0.f);
58+
return;
59+
} else {
60+
auto ws_allocator = csl::WorkspaceAllocator(workspace);
61+
62+
auto mean = ws_allocator.get_span<float>(loops);
63+
kernels::fill<float>(stream, mean, 0.f);
64+
65+
auto stdev = ws_allocator.get_span<float>(loops);
66+
kernels::fill<float>(stream, stdev, 0.f);
67+
68+
kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size);
69+
kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size, epsilon);
70+
kernels::normalize_mean_variance_channelwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C);
71+
}
72+
}
73+
74+
std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
75+
76+
private:
77+
csl::Stream stream;
78+
79+
float epsilon;
80+
81+
std::size_t scratch_mem_in_bytes;
82+
};
83+
84+
}}} // cv::dnn::cuda4dnn
85+
86+
#endif // OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP

modules/dnn/src/init.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ void initializeLayerFactory()
160160
CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);
161161
CV_DNN_REGISTER_LAYER_CLASS(LayerNorma F972 lization, LayerNormLayer);
162162
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
163+
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
163164

164165
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
165166
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);

modules/dnn/src/layers/cpu_kernels/fast_norm.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,11 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,
118118

119119
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon) {
120120
const auto input_shape = shape(input);
121+
size_t N = input_shape[0], C = input_shape[1];
121122
CV_CheckEQ(scale.total(), bias.total(), "fastNormChannel: scale and bias should have the same shape");
123+
CV_CheckEQ(scale.total(), C, "fastNormChannel: scale should be a 1d tensor and match the channel of input");
122124
CV_CheckGE(input.dims, 3, "fastNormChannel: input dimension >= 3");
123125

124-
size_t N = input_shape[0], C = input_shape[1];
125126
size_t loops = N * C,
126127
norm_size = static_cast<size_t>(total(input_shape, 2));
127128
float inv_norm_size = 1.0 / norm_size;
@@ -147,9 +148,9 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o
147148
float inv_stdev = 1.f / mean_square;
148149

149150
size_t c = i % C;
150-
float s = scale_data[c], b = bias_data[c];
151+
float s = scale_data[c] * inv_stdev, b = bias_data[c];
151152
for (size_t j = 0; j < norm_size; j++) {
152-
y[j] = s * (x[j] - mean) * inv_stdev + b;
153+
y[j] = s * (x[j] - mean) + b;
153154
}
154155
}
155156
};

0 commit comments

Comments
 (0)
0