8000 Merge pull request #21534 from alexgiving:atrutnev/simd_for_merge4 · opencv/opencv@2efcaa9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2efcaa9

Browse files
committed
Merge pull request #21534 from alexgiving:atrutnev/simd_for_merge4
2 parents f77c357 + aa53541 commit 2efcaa9

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed

modules/gapi/src/backends/fluid/gfluidcore.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2686,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
26862686

26872687
int w = 0; // cycle counter
26882688

2689-
#if CV_SIMD128
2690-
for (; w <= width-16; w+=16)
2691-
{
2692-
v_uint8x16 a, b, c, d;
2693-
a = v_load(&in1[w]);
2694-
b = v_load(&in2[w]);
2695-
c = v_load(&in3[w]);
2696-
d = v_load(&in4[w]);
2697-
v_store_interleave(&out[4*w], a, b, c, d);
2698-
}
2689+
#if CV_SIMD
2690+
w = merge4_simd(in1, in2, in3, in4, out, width);
26992691
#endif
27002692

27012693
for (; w < width; w++)

modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp

Lines changed: 7 additions & 0 deletions
8000
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,13 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
256256
CV_CPU_DISPATCH_MODES_ALL);
257257
}
258258

259+
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
260+
const uchar in4[], uchar out[], const int width)
261+
{
262+
CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width),
263+
CV_CPU_DISPATCH_MODES_ALL);
264+
}
265+
259266
} // namespace fluid
260267
} // namespace gapi
261268
} // namespace cv

modules/gapi/src/backends/fluid/gfluidcore_func.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
196196
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
197197
uchar out[], const int width);
198198

199+
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
200+
const uchar in4[], uchar out[], const int width);
10000
201+
199202
} // namespace fluid
200203
} // namespace gapi
201204
} // namespace cv

modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
217217
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
218218
uchar out[], const int width);
219219

220+
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
221+
const uchar in4[], uchar out[], const int width);
222+
220223
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
221224

222225
struct scale_tag {};
@@ -2076,6 +2079,41 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
20762079
return x;
20772080
}
20782081

2082+
//-------------------------
2083+
//
2084+
// Fluid kernels: Merge4
2085+
//
2086+
//-------------------------
2087+
2088+
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
2089+
const uchar in4[], uchar out[], const int width)
2090+
{
2091+
constexpr int nlanes = v_uint8::nlanes;
2092+
if (width < nlanes)
2093+
return 0;
2094+
2095+
int x = 0;
2096+
for (;;)
2097+
{
2098+
for (; x <= width - nlanes; x += nlanes)
2099+
{
2100+
v_uint8 a, b, c, d;
2101+
a = vx_load(&in1[x]);
2102+
b = vx_load(&in2[x]);
2103+
c = vx_load(&in3[x]);
2104+
d = vx_load(&in4[x]);
2105+
v_store_interleave(&out[4 * x], a, b, c, d);
2106+
}
2107+
if (x < width)
2108+
{
2109+
x = width - nlanes;
2110+
continue;
2111+
}
2112+
break;
2113+
}
2114+
return x;
2115+
}
2116+
20792117
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
20802118

20812119
CV_CPU_OPTIMIZATION_NAMESPACE_END

0 commit comments

Comments
 (0)
0