8000 Merge pull request #21441 from alexgiving:atrutnev/split3_simd_fluid · opencv/opencv@9238316 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9238316

Browse files
committed
Merge pull request #21441 from alexgiving:atrutnev/split3_simd_fluid
2 parents 266835c + 5e89b9a commit 9238316

File tree

4 files changed

+43
-16
lines changed

4 files changed

+43
-16
lines changed

modules/gapi/src/backends/fluid/gfluidcore.cpp

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,26 +2509,18 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
25092509

25102510
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
25112511
{
2512-
const auto *in = src.InLine<uchar>(0);
2513-
auto *out1 = dst1.OutLine<uchar>();
2514-
auto *out2 = dst2.OutLine<uchar>();
2515-
auto *out3 = dst3.OutLine<uchar>();
2512+
const auto *in = src.InLine<uchar>(0);
2513+
auto *out1 = dst1.OutLine<uchar>();
2514+
auto *out2 = dst2.OutLine<uchar>();
2515+
auto *out3 = dst3.OutLine<uchar>();
25162516

25172517
GAPI_Assert(3 == src.meta().chan);
25182518
int width = src.length();
2519+
int w = 0;
25192520

2520-
int w = 0; // cycle counter
2521-
2522-
#if CV_SIMD128
2523-
for (; w <= width-16; w+=16)
2524-
{
2525-
v_uint8x16 a, b, c;
2526-
v_load_deinterleave(&in[3*w], a, b, c);
2527-
v_store(&out1[w], a);
2528-
v_store(&out2[w], b);
2529-
v_store(&out3[w], c);
2530-
}
2531-
#endif
2521+
#if CV_SIMD
2522+
w = split3_simd(in, out1, out2, out3, width);
2523+
#endif
25322524

25332525
for (; w < width; w++)
25342526
{

modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,13 @@ ABSDIFFC_SIMD(float)
207207

208208
#undef ABSDIFFC_SIMD
209209

210+
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
211+
uchar out3[], const int width)
212+
{
213+
CV_CPU_DISPATCH(split3_simd, (in, out1, out2, out3, width),
214+
CV_CPU_DISPATCH_MODES_ALL);
215+
}
216+
210217
} // namespace fluid
211218
} // namespace gapi
212219
} // namespace cv

modules/gapi/src/backends/fluid/gfluidcore_func.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ ABSDIFFC_SIMD(float)
163163

164164
#undef ABSDIFFC_SIMD
165165

166+
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
167+
uchar out3[], const int width);
168+
166169
} // namespace fluid
167170
} // namespace gapi
168171
} // namespace cv

modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ ABSDIFFC_SIMD(float)
184184

185185
#undef ABSDIFFC_SIMD
186186

187+
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
188+
uchar out3[], const int width);
189+
187190
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
188191

189192
struct scale_tag {};
@@ -1568,6 +1571,28 @@ ABSDIFFC_SIMD(float)
15681571

15691572
#undef ABSDIFFC_SIMD
15701573

1574+
//-------------------------
1575+
//
1576+
// Fluid kernels: Split3
1577+
//
1578+
//-------------------------
1579+
1580+
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
1581+
uchar out3[], const int width)
1582+
{
1583+
constexpr int nlanes = v_uint8::nlanes;
1584+
int x = 0;
1585+
for (; x <= width - nlanes; x += nlanes)
1586+
{
1587+
v_uint8 a, b, c;
1588+
v_load_deinterleave(&in[3 * x], a, b, c);
1589+
vx_store(&out1[x], a);
1590+
vx_store(&out2[x], b);
1591+
vx_store(&out3[x], c);
1592+
}
1593+
return x;
1594+
}
1595+
15711596
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
15721597

15731598
CV_CPU_OPTIMIZATION_NAMESPACE_END

0 commit comments

Comments
 (0)
0