8000 Add tail proc for split3 and split4 · opencv/opencv@667fc9b · GitHub
[go: up one dir, main page]

Skip to content

Commit 667fc9b

Browse files
author
Aleksei Trutnev
committed
Add tail proc for split3 and split4
1 parent 946054c commit 667fc9b

File tree

1 file changed

+37
-13
lines changed

1 file changed

+37
-13
lines changed

modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,14 +1584,26 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[],
15841584
uchar out3[], const int width)
15851585
{
15861586
constexpr int nlanes = v_uint8::nlanes;
1587+
if (width < nlanes)
1588+
return 0;
1589+
15871590
int x = 0;
1588-
for (; x <= width - nlanes; x += nlanes)
1591+
for (;;)
15891592
{
1590-
v_uint8 a, b, c;
1591-
v_load_deinterleave(&in[3 * x], a, b, c);
1592-
vx_store(&out1[x], a);
1593-
vx_store(&out2[x], b);
1594-
vx_store(&out3[x], c);
1593+
for (; x <= width - nlanes; x += nlanes)
1594+
{
1595+
v_uint8 a, b, c;
1596+
v_load_deinterleave(&in[3 * x], a, b, c);
1597+
vx_store(&out1[x], a);
1598+
vx_store(&out2[x], b);
1599+
vx_store(&out3[x], c);
1600+
}
1601+
if (x < width)
1602+
{
1603+
x = width - nlanes;
1604+
continue;
1605+
}
1606+
break;
15951607
}
15961608
return x;
15971609
}
@@ -1606,15 +1618,27 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
16061618
uchar out3[], uchar out4[], const int width)
16071619
{
16081620
constexpr int nlanes = v_uint8::nlanes;
1621+
if (width < nlanes)
1622+
return 0;
1623+
16091624
int x = 0;
1610-
for (; x <= width - nlanes; x += nlanes)
1625+
for (;;)
16111626
{
1612-
v_uint8 a, b, c, d;
1613-
v_load_deinterleave(&in[4 * x], a, b, c, d);
1614-
vx_store(&out1[x], a);
1615-
vx_store(&out2[x], b);
1616-
vx_store(&out3[x], c);
1617-
vx_store(&out4[x], d);
1627+
for (; x <= width - nlanes; x += nlanes)
1628+
{
1629+
v_uint8 a, b, c, d;
1630+
v_load_deinterleave(&in[4 * x], a, b, c, d);
1631+
vx_store(&out1[x], a);
1632+
vx_store(&out2[x], b);
1633+
vx_store(&out3[x], c);
1634+
vx_store(&out4[x], d);
1635+
}
1636+
if (x < width)
1637+
{
1638+
x = width - nlanes;
1639+
continue;
1640+
}
1641+
break;
16181642
}
16191643
return x;
16201644
}

0 commit comments

Comments
 (0)
0