8000 Merge pull request #392 from howjmay/vminv · plctlab/numpy@2900f11 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2900f11

Browse files
authored
Merge pull request numpy#392 from howjmay/vminv
feat: Add vminv[q]_[s8|s16|s32|u8|u16|u32|f32|f64]
2 parents a1055f9 + 7efa420 commit 2900f11

File tree

3 files changed

+315
-45
lines changed

3 files changed

+315
-45
lines changed

neon2rvv.h

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4468,35 +4468,65 @@ FORCE_INLINE float64_t vmaxvq_f64(float64x2_t a) {
44684468
return __riscv_vfmv_f_s_f64m1_f64(__riscv_vfredmax_vs_f64m1_f64m1(a, __riscv_vfmv_v_f_f64m1(-DBL_MAX, 2), 2));
44694469
}
44704470

4471-
// FORCE_INLINE int8_t vminv_s8(int8x8_t a);
4471+
FORCE_INLINE int8_t vminv_s8(int8x8_t a) {
4472+
return __riscv_vmv_x_s_i8m1_i8(__riscv_vredmin_vs_i8m1_i8m1(a, vdup_n_s8(INT8_MAX), 8));
4473+
}
44724474

4473-
// FORCE_INLINE int8_t vminvq_s8(int8x16_t a);
4475+
FORCE_INLINE int8_t vminvq_s8(int8x16_t a) {
4476+
return __riscv_vmv_x_s_i8m1_i8(__riscv_vredmin_vs_i8m1_i8m1(a, vdupq_n_s8(INT8_MAX), 16));
4477+
}
44744478

4475-
// FORCE_INLINE int16_t vminv_s16(int16x4_t a);
4479+
FORCE_INLINE int16_t vminv_s16(int16x4_t a) {
4480+
return __riscv_vmv_x_s_i16m1_i16(__riscv_vredmin_vs_i16m1_i16m1(a, vdup_n_s16(INT16_MAX), 4));
4481+
}
44764482

4477-
// FORCE_INLINE int16_t vminvq_s16(int16x8_t a);
4483+
FORCE_INLINE int16_t vminvq_s16(int16x8_t a) {
4484+
return __riscv_vmv_x_s_i16m1_i16(__riscv_vredmin_vs_i16m1_i16m1(a, vdupq_n_s16(INT16_MAX), 8));
4485+
}
44784486

4479-
// FORCE_INLINE int32_t vminv_s32(int32x2_t a);
4487+
FORCE_INLINE int32_t vminv_s32(int32x2_t a) {
4488+
return __riscv_vmv_x_s_i32m1_i32(__riscv_vredmin_vs_i32m1_i32m1(a, vdup_n_s32(INT32_MAX), 2));
4489+
}
44804490

4481-
// FORCE_INLINE int32_t vminvq_s32(int32x4_t a);
4491+
FORCE_INLINE int32_t vminvq_s32(int32x4_t a) {
4492+
return __riscv_vmv_x_s_i32m1_i32(__riscv_vredmin_vs_i32m1_i32m1(a, vdupq_n_s32(INT32_MAX), 4));
4493+
}
44824494

4483-
// FORCE_INLINE uint8_t vminv_u8(uint8x8_t a);
4495+
FORCE_INLINE uint8_t vminv_u8(uint8x8_t a) {
4496+
return __riscv_vmv_x_s_u8m1_u8(__riscv_vredminu_vs_u8m1_u8m1(a, vdup_n_u8(UINT8_MAX), 8));
4497+
}
44844498

4485-
// FORCE_INLINE uint8_t vminvq_u8(uint8x16_t a);
4499+
FORCE_INLINE uint8_t vminvq_u8(uint8x16_t a) {
4500+
return __riscv_vmv_x_s_u8m1_u8(__riscv_vredminu_vs_u8m1_u8m1(a, vdupq_n_u8(UINT8_MAX), 16));
4501+
}
44864502

4487-
// FORCE_INLINE uint16_t vminv_u16(uint16x4_t a);
4503+
FORCE_INLINE uint16_t vminv_u16(uint16x4_t a) {
4504+
return __riscv_vmv_x_s_u16m1_u16(__riscv_vredminu_vs_u16m1_u16m1(a, vdup_n_u16(UINT16_MAX), 4));
4505+
}
44884506

4489-
// FORCE_INLINE uint16_t vminvq_u16(uint16x8_t a);
4507+
FORCE_INLINE uint16_t vminvq_u16(uint16x8_t a) {
4508+
return __riscv_vmv_x_s_u16m1_u16(__riscv_vredminu_vs_u16m1_u16m1(a, vdupq_n_u16(UINT16_MAX), 8));
4509+
}
44904510

4491-
// FORCE_INLINE uint32_t vminv_u32(uint32x2_t a);
4511+
FORCE_INLINE uint32_t vminv_u32(uint32x2_t a) {
4512+
return __riscv_vmv_x_s_u32m1_u32(__riscv_vredminu_vs_u32m1_u32m1(a, vdup_n_u32(UINT32_MAX), 2));
4513+
}
44924514

4493-
// FORCE_INLINE uint32_t vminvq_u32(uint32x4_t a);
4515+
FORCE_INLINE uint32_t vminvq_u32(uint32x4_t a) {
4516+
return __riscv_vmv_x_s_u32m1_u32(__riscv_vredminu_vs_u32m1_u32m1(a, vdupq_n_u32(UINT32_MAX), 4));
4517+
}
44944518

4495-
// FORCE_INLINE float32_t vminv_f32(float32x2_t a);
4519+
FORCE_INLINE float32_t vminv_f32(float32x2_t a) {
4520+
return __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredmin_vs_f32m1_f32m1(a, __riscv_vfmv_v_f_f32m1(FLT_MAX, 2), 2));
4521+
}
44964522

4497-
// FORCE_INLINE float32_t vminvq_f32(float32x4_t a);
4523+
FORCE_INLINE float32_t vminvq_f32(float32x4_t a) {
4524+
return __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredmin_vs_f32m1_f32m1(a, __riscv_vfmv_v_f_f32m1(FLT_MAX, 4), 4));
4525+
}
44984526

4499-
// FORCE_INLINE float64_t vminvq_f64(float64x2_t a);
4527+
FORCE_INLINE float64_t vminvq_f64(float64x2_t a) {
4528+
return __riscv_vfmv_f_s_f64m1_f64(__riscv_vfredmin_vs_f64m1_f64m1(a, __riscv_vfmv_v_f_f64m1(DBL_MAX, 2), 2));
4529+
}
45004530

45014531
// FORCE_INLINE float32_t vmaxnmv_f32(float32x2_t a);
45024532

tests/impl.cpp

Lines changed: 255 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15811,35 +15811,275 @@ result_t test_vmaxvq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
1581115811
#endif // ENABLE_TEST_ALL
1581215812
}
1581315813

15814-
result_t test_vminv_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15814+
result_t test_vminv_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15815+
#ifdef ENABLE_TEST_ALL
15816+
const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1;
15817+
int8_t _c = INT8_MAX;
15818+
for (int i = 0; i < 8; i++) {
15819+
if (_a[i] < _c) {
15820+
_c = _a[i];
15821+
}
15822+
}
1581515823

15816-
result_t test_vminvq_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15824+
int8x8_t a = vld1_s8(_a);
15825+
int8_t c = vminv_s8(a);
15826+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15827+
#else
15828+
return TEST_UNIMPL;
15829+
#endif // ENABLE_TEST_ALL
15830+
}
1581715831

15818-
result_t test_vminv_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15832+
result_t test_vminvq_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15833+
#ifdef ENABLE_TEST_ALL
15834+
const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1;
15835+
int8_t _c = INT8_MAX;
15836+
for (int i = 0; i < 16; i++) {
15837+
if (_a[i] < _c) {
15838+
_c = _a[i];
15839+
}
15840+
}
1581915841

15820-
result_t test_vminvq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15842+
int8x16_t a = vld1q_s8(_a);
15843+
int8_t c = vminvq_s8(a);
15844+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15845+
#else
15846+
return TEST_UNIMPL;
15847+
#endif // ENABLE_TEST_ALL
15848+
}
1582115849

15822-
result_t test_vminv_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15850+
result_t test_vminv_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15851+
#ifdef ENABLE_TEST_ALL
15852+
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
15853+
int16_t _c = INT16_MAX;
15854+
for (int i = 0; i < 4; i++) {
15855+
if (_a[i] < _c) {
15856+
_c = _a[i];
15857+
}
15858+
}
15859+
15860+
int16x4_t a = vld1_s16(_a);
15861+
int16_t c = vminv_s16(a);
15862+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15863+
#else
15864+
return TEST_UNIMPL;
15865+
#endif // ENABLE_TEST_ALL
15866+
}
1582315867

15824-
result_t test_vminvq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15868+
result_t test_vminvq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15869+
#ifdef ENABLE_TEST_ALL
15870+
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
15871+
int16_t _c = INT16_MAX;
15872+
for (int i = 0; i < 8; i++) {
15873+
if (_a[i] < _c) {
15874+
_c = _a[i];
15875+
}
15876+
}
15877+
15878+
int16x8_t a = vld1q_s16(_a);
15879+
int16_t c = vminvq_s16(a);
15880+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15881+
#else
15882+
return TEST_UNIMPL;
15883+
#endif // ENABLE_TEST_ALL
15884+
}
1582515885

15826-
result_t test_vminv_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15886+
result_t test_vminv_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15887+
#ifdef ENABLE_TEST_ALL
15888+
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
15889+
int32_t _c = INT32_MAX;
15890+
for (int i = 0; i < 2; i++) {
15891+
if (_a[i] < _c) {
15892+
_c = _a[i];
15893+
}
15894+
}
1582715895

15828-
result_t test_vminvq_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15896+
int32x2_t a = vld1_s32(_a);
15897+
int32_t c = vminv_s32(a);
15898+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15899+
#else
15900+
return TEST_UNIMPL;
15901+
#endif // ENABLE_TEST_ALL
15902+
}
1582915903

15830-
result_t test_vminv_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15904+
result_t test_vminvq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15905+
#ifdef ENABLE_TEST_ALL
15906+
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
15907+
int32_t _c = INT32_MAX;
15908+
for (int i = 0; i < 4; i++) {
15909+
if (_a[i] < _c) {
15910+
_c = _a[i];
15911+
}
15912+
}
1583115913

15832-
result_t test_vminvq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15914+
int32x4_t a = vld1q_s32(_a);
15915+
int32_t c = vminvq_s32(a);
15916+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15917+
#else
15918+
return TEST_UNIMPL;
15919+
#endif // ENABLE_TEST_ALL
15920+
}
1583315921

15834-
result_t test_vminv_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15922+
result_t test_vminv_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15923+
#ifdef ENABLE_TEST_ALL
15924+
const uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1;
15925+
uint8_t _c = UINT8_MAX;
15926+
for (int i = 0; i < 8; i++) {
15927+
if (_a[i] < _c) {
15928+
_c = _a[i];
15929+
}
15930+
}
1583515931

15836-
result_t test_vminvq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15932+
uint8x8_t a = vld1_u8(_a);
15933+
uint8_t c = vminv_u8(a);
15934+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15935+
#else
15936+
return TEST_UNIMPL;
15937+
#endif // ENABLE_TEST_ALL
15938+
}
1583715939

15838-
result_t test_vminv_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15940+
result_t test_vminvq_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15941+
#ifdef ENABLE_TEST_ALL
15942+
const uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1;
15943+
uint8_t _c = UINT8_MAX;
15944+
for (int i = 0; i < 16; i++) {
15945+
if (_a[i] < _c) {
15946+
_c = _a[i];
15947+
}
15948+
}
1583915949

15840-
result_t test_vminvq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15950+
uint8x16_t a = vld1q_u8(_a);
15951+
uint8_t c = vminvq_u8(a);
15952+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15953+
#else
15954+
return TEST_UNIMPL;
15955+
#endif // ENABLE_TEST_ALL
15956+
}
1584115957

15842-
result_t test_vminvq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
15958+
result_t test_vminv_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15959+
#ifdef ENABLE_TEST_ALL
15960+
const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
15961+
uint16_t _c = UINT16_MAX;
15962+
for (int i = 0; i < 4; i++) {
15963+
if (_a[i] < _c) {
15964+
_c = _a[i];
15965+
}
15966+
}
15967+
15968+
uint16x4_t a = vld1_u16(_a);
15969+
uint16_t c = vminv_u16(a);
15970+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15971+
#else
15972+
return TEST_UNIMPL;
15973+
#endif // ENABLE_TEST_ALL
15974+
}
15975+
15976+
result_t test_vminvq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15977+
#ifdef ENABLE_TEST_ALL
15978+
const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
15979+
uint16_t _c = UINT16_MAX;
15980+
for (int i = 0; i < 8; i++) {
15981+
if (_a[i] < _c) {
15982+
_c = _a[i];
15983+
}
15984+
}
15985+
15986+
uint16x8_t a = vld1q_u16(_a);
15987+
uint16_t c = vminvq_u16(a);
15988+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
15989+
#else
15990+
return TEST_UNIMPL;
15991+
#endif // ENABLE_TEST_ALL
15992+
}
15993+
15994+
result_t test_vminv_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
15995+
#ifdef ENABLE_TEST_ALL
15996+
const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
15997+
uint32_t _c = UINT32_MAX;
15998+
for (int i = 0; i < 2; i++) {
15999+
if (_a[i] < _c) {
16000+
_c = _a[i];
16001+
}
16002+
}
16003+
16004+
uint32x2_t a = vld1_u32(_a);
16005+
uint32_t c = vminv_u32(a);
16006+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
16007+
#else
16008+
return TEST_UNIMPL;
16009+
#endif // ENABLE_TEST_ALL
16010+
}
16011+
16012+
result_t test_vminvq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
16013+
#ifdef ENABLE_TEST_ALL
16014+
const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
16015+
uint32_t _c = UINT32_MAX;
16016+
for (int i = 0; i < 4; i++) {
16017+
if (_a[i] < _c) {
16018+
_c = _a[i];
16019+
}
16020+
}
16021+
16022+
uint32x4_t a = vld1q_u32(_a);
16023+
uint32_t c = vminvq_u32(a);
16024+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
16025+
#else
16026+
return TEST_UNIMPL;
16027+
#endif // ENABLE_TEST_ALL
16028+
}
16029+
16030+
result_t test_vminv_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
16031+
#ifdef ENABLE_TEST_ALL
16032+
const float *_a = (float *)impl.test_cases_float_pointer1;
16033+
float _c = FLT_MAX;
16034+
for (int i = 0; i < 2; i++) {
16035+
if (_a[i] < _c) {
16036+
_c = _a[i];
16037+
}
16038+
}
16039+
16040+
float32x2_t a = vld1_f32(_a);
16041+
float32_t c = vminv_f32(a);
16042+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
16043+
#else
16044+
return TEST_UNIMPL;
16045+
#endif // ENABLE_TEST_ALL
16046+
}
16047+
16048+
result_t test_vminvq_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
16049+
#ifdef ENABLE_TEST_ALL
16050+
const float *_a = (float *)impl.test_cases_float_pointer1;
16051+
float _c = FLT_MAX;
16052+
for (int i = 0; i < 4; i++) {
16053+
if (_a[i] < _c) {
16054+
_c = _a[i];
16055+
}
16056+
}
16057+
16058+
float32x4_t a = vld1q_f32(_a);
16059+
float32_t c = vminvq_f32(a);
16060+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
16061+
#else
16062+
return TEST_UNIMPL;
16063+
#endif // ENABLE_TEST_ALL
16064+
}
16065+
16066+
result_t test_vminvq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
16067+
#ifdef ENABLE_TEST_ALL
16068+
const double *_a = (double *)impl.test_cases_float_pointer1;
16069+
double _c = DBL_MAX;
16070+
for (int i = 0; i < 2; i++) {
16071+
if (_a[i] < _c) {
16072+
_c = _a[i];
16073+
}
16074+
}
16075+
16076+
float64x2_t a = vld1q_f64(_a);
16077+
float64_t c = vminvq_f64(a);
16078+
return c == _c ? TEST_SUCCESS : TEST_FAIL;
16079+
#else
16080+
return TEST_UNIMPL;
16081+
#endif // ENABLE_TEST_ALL
16082+
}
1584316083

1584416084
result_t test_vmaxnmv_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
1584516085

0 commit comments

Comments
 (0)
0