8000 Merge pull request #294 from howjmay/eor · plctlab/numpy@c6bb7f4 · GitHub
[go: up one dir, main page]

Skip to content

Commit c6bb7f4

Browse files
authored
Merge pull request numpy#294 from howjmay/eor
feat: Add veor[q]_[s8|s16|s32|u8|u16|u32|s64|u64]
2 parents 7b5aa3a + 5f82a0e commit c6bb7f4

File tree

2 files changed

+290
-32
lines changed

2 files changed

+290
-32
lines changed

neon2rvv.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7475,37 +7475,37 @@ FORCE_INLINE uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) { re 8000 turn __ris
74757475

74767476
FORCE_INLINE uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { return __riscv_vor_vv_u64m1(__a, __b, 2); }
74777477

7478-
// FORCE_INLINE int8x8_t veor_s8(int8x8_t __a, int8x8_t __b);
7478+
FORCE_INLINE int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) { return __riscv_vxor_vv_i8m1(__a, __b, 8); }
74797479

7480-
// FORCE_INLINE int16x4_t veor_s16(int16x4_t __a, int16x4_t __b);
7480+
FORCE_INLINE int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) { return __riscv_vxor_vv_i16m1(__a, __b, 4); }
74817481

7482-
// FORCE_INLINE int32x2_t veor_s32(int32x2_t __a, int32x2_t __b);
7482+
FORCE_INLINE int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) { return __riscv_vxor_vv_i32m1(__a, __b, 2); }
74837483

7484-
// FORCE_INLINE uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b);
7484+
FORCE_INLINE uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) { return __riscv_vxor_vv_u8m1(__a, __b, 8); }
74857485

7486-
// FORCE_INLINE uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b);
7486+
FORCE_INLINE uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) { return __riscv_vxor_vv_u16m1(__a, __b, 4); }
74877487

7488-
// FORCE_INLINE uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b);
7488+
FORCE_INLINE uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) { return __riscv_vxor_vv_u32m1(__a, __b, 2); }
74897489

7490-
// FORCE_INLINE int64x1_t veor_s64(int64x1_t __a, int64x1_t __b);
7490+
FORCE_INLINE int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) { return __riscv_vxor_vv_i64m1(__a, __b, 1); }
74917491

7492-
// FORCE_INLINE uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b);
7492+
FORCE_INLINE uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) { return __riscv_vxor_vv_u64m1(__a, __b, 1); }
74937493

7494-
// FORCE_INLINE int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b);
7494+
FORCE_INLINE int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) { return __riscv_vxor_vv_i8m1(__a, __b, 16); }
74957495

7496-
// FORCE_INLINE int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b);
7496+
FORCE_INLINE int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) { return __riscv_vxor_vv_i16m1(__a, __b, 8); }
74977497

7498-
// FORCE_INLINE int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b);
7498+
FORCE_INLINE int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) { return __riscv_vxor_vv_i32m1(__a, __b, 4); }
74997499

7500-
// FORCE_INLINE int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b);
7500+
FORCE_INLINE int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) { return __riscv_vxor_vv_i64m1(__a, __b, 2); }
75017501

7502-
// FORCE_INLINE uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b);
7502+
FORCE_INLINE uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) { return __riscv_vxor_vv_u8m1(__a, __b, 16); }
75037503

7504-
// FORCE_INLINE uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b);
7504+
FORCE_INLINE uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) { return __riscv_vxor_vv_u16m1(__a, __b, 8); }
75057505

7506-
// FORCE_INLINE uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b);
7506+
FORCE_INLINE uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { return __riscv_vxor_vv_u32m1(__a, __b, 4); }
75077507

7508-
// FORCE_INLINE uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b);
7508+
FORCE_INLINE uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { return __riscv_vxor_vv_u64m1(__a, __b, 2); }
75097509

75107510
FORCE_INLINE int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) {
75117511
return __riscv_vand_vv_i8m1(__a, __riscv_vnot_v_i8m1(__b, 8), 8);

tests/impl.cpp

Lines changed: 274 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30511,37 +30511,295 @@ result_t test_vorrq_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
3051130511
#endif // ENABLE_TEST_ALL
3051230512
}
3051330513

30514-
result_t test_veor_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30514+
result_t test_veor_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30515+
#ifdef ENABLE_TEST_ALL
30516+
const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1;
30517+
const int8_t *_b = (int8_t *)impl.test_cases_int_pointer2;
30518+
int8_t _d[8];
30519+
for (int i = 0; i < 8; i++) {
30520+
_d[i] = _a[i] ^ _b[i];
30521+
}
30522+
30523+
int8x8_t a = vld1_s8(_a);
30524+
int8x8_t b = vld1_s8(_b);
30525+
int8x8_t c = veor_s8(a, b);
30526+
return validate_int8(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7]);
30527+
#else
30528+
return TEST_UNIMPL;
30529+
#endif // ENABLE_TEST_ALL
30530+
}
30531+
30532+
result_t test_veor_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30533+
#ifdef ENABLE_TEST_ALL
30534+
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
30535+
const int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
30536+
int16_t _d[4];
30537+
for (int i = 0; i < 4; i++) {
30538+
_d[i] = _a[i] ^ _b[i];
30539+
}
30540+
30541+
int16x4_t a = vld1_s16(_a);
30542+
int16x4_t b = vld1_s16(_b);
30543+
int16x4_t c = veor_s16(a, b);
30544+
return validate_int16(c, _d[0], _d[1], _d[2], _d[3]);
30545+
#else
30546+
return TEST_UNIMPL;
30547+
#endif // ENABLE_TEST_ALL
30548+
}
30549+
30550+
result_t test_veor_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30551+
#ifdef ENABLE_TEST_ALL
30552+
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
30553+
const int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
30554+
int32_t _d[2];
30555+
for (int i = 0; i < 2; i++) {
30556+
_d[i] = _a[i] ^ _b[i];
30557+
}
30558+
30559+
int32x2_t a = vld1_s32(_a);
30560+
int32x2_t b = vld1_s32(_b);
30561+
int32x2_t c = veor_s32(a, b);
30562+
return validate_int32(c, _d[0], _d[1]);
30563+
#else
30564+
return TEST_UNIMPL;
30565+
#endif // ENABLE_TEST_ALL
30566+
}
30567+
30568+
result_t test_veor_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30569+
#ifdef ENABLE_TEST_ALL
30570+
const uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1;
30571+
const uint8_t *_b = (uint8_t *)impl.test_cases_int_pointer2;
30572+
uint8_t _d[8];
30573+
for (int i = 0; i < 8; i++) {
30574+
_d[i] = _a[i] ^ _b[i];
30575+
}
30576+
30577+
uint8x8_t a = vld1_u8(_a);
30578+
uint8x8_t b = vld1_u8(_b);
30579+
uint8x8_t c = veor_u8(a, b);
30580+
return validate_uint8(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7]);
30581+
#else
30582+
return TEST_UNIMPL;
30583+
#endif // ENABLE_TEST_ALL
30584+
}
30585+
30586+
result_t test_veor_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30587+
#ifdef ENABLE_TEST_ALL
30588+
const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
30589+
const uint16_t *_b = (uint16_t *)impl.test_cases_int_pointer2;
30590+
uint16_t _d[4];
30591+
for (int i = 0; i < 4; i++) {
30592+
_d[i] = _a[i] ^ _b[i];
30593+
}
30594+
30595+
uint16x4_t a = vld1_u16(_a);
30596+
uint16x4_t b = vld1_u16(_b);
30597+
uint16x4_t c = veor_u16(a, b);
30598+
return validate_uint16(c, _d[0], _d[1], _d[2], _d[3]);
30599+
#else
30600+
return TEST_UNIMPL;
30601+
#endif // ENABLE_TEST_ALL
30602+
}
30603+
30604+
result_t test_veor_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30605+
#ifdef ENABLE_TEST_ALL
30606+
const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
30607+
const uint32_t *_b = (uint32_t *)impl.test_cases_int_pointer2;
30608+
uint32_t _d[2];
30609+
for (int i = 0; i < 2; i++) {
30610+
_d[i] = _a[i] ^ _b[i];
30611+
}
30612+
30613+
uint32x2_t a = vld1_u32(_a);
30614+
uint32x2_t b = vld1_u32(_b);
30615+
uint32x2_t c = veor_u32(a, b);
30616+
return validate_uint32(c, _d[0], _d[1]);
30617+
#else
30618+
return TEST_UNIMPL;
30619+
#endif // ENABLE_TEST_ALL
30620+
}
30621+
30622+
result_t test_veor_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30623+
#ifdef ENABLE_TEST_ALL
30624+
const int64_t *_a = (int64_t *)impl.test_cases_int_pointer1;
30625+
const int64_t *_b = (int64_t *)impl.test_cases_int_pointer2;
30626+
int64_t _d[1];
30627+
for (int i = 0; i < 1; i++) {
30628+
_d[i] = _a[i] ^ _b[i];
30629+
}
30630+
30631+
int64x1_t a = vld1_s64(_a);
30632+
int64x1_t b = vld1_s64(_b);
30633+
int64x1_t c = veor_s64(a, b);
30634+
return validate_int64(c, _d[0]);
30635+
#else
30636+
return TEST_UNIMPL;
30637+
#endif // ENABLE_TEST_ALL
30638+
}
30639+
30640+
result_t test_veor_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30641+
#ifdef ENABLE_TEST_ALL
30642+
const uint64_t *_a = (uint64_t *)impl.test_cases_int_pointer1;
30643+
const uint64_t *_b = (uint64_t *)impl.test_cases_int_pointer2;
30644+
uint64_t _d[1];
30645+
for (int i = 0; i < 1; i++) {
30646+
_d[i] = _a[i] ^ _b[i];
30647+
}
30648+
30649+
uint64x1_t a = vld1_u64(_a);
30650+
uint64x1_t b = vld1_u64(_b);
30651+
uint64x1_t c = veor_u64(a, b);
30652+
return validate_uint64(c, _d[0]);
30653+
#else
30654+
return TEST_UNIMPL;
30655+
#endif // ENABLE_TEST_ALL
30656+
}
3051530657

30516-
result_t test_veor_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30658+
result_t test_veorq_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30659+
#ifdef ENABLE_TEST_ALL
30660+
const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1;
30661+
const int8_t *_b = (int8_t *)impl.test_cases_int_pointer2;
30662+
int8_t _d[16];
30663+
for (int i = 0; i < 16; i++) {
30664+
_d[i] = _a[i] ^ _b[i];
30665+
}
3051730666

30518-
result_t test_veor_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30667+
int8x16_t a = vld1q_s8(_a);
30668+
int8x16_t b = vld1q_s8(_b);
30669+
int8x16_t c = veorq_s8(a, b);
30670+
return validate_int8(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7], _d[8], _d[9], _d[10], _d[11], _d[12],
30671+
_d B94F [13], _d[14], _d[15]);
30672+
#else
30673+
return TEST_UNIMPL;
30674+
#endif // ENABLE_TEST_ALL
30675+
}
3051930676

30520-
result_t test_veor_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30677+
result_t test_veorq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30678+
#ifdef ENABLE_TEST_ALL
30679+
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
30680+
const int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
30681+
int16_t _d[8];
30682+
for (int i = 0; i < 8; i++) {
30683+
_d[i] = _a[i] ^ _b[i];
30684+
}
3052130685

30522-
result_t test_veor_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30686+
int16x8_t a = vld1q_s16(_a);
30687+
int16x8_t b = vld1q_s16(_b);
30688+
int16x8_t c = veorq_s16(a, b);
30689+
return validate_int16(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7]);
30690+
#else
30691+
return TEST_UNIMPL;
30692+
#endif // ENABLE_TEST_ALL
30693+
}
3052330694

30524-
result_t test_veor_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30695+
result_t test_veorq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30696+
#ifdef ENABLE_TEST_ALL
30697+
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
30698+
const int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
30699+
int32_t _d[4];
30700+
for (int i = 0; i < 4; i++) {
30701+
_d[i] = _a[i] ^ _b[i];
30702+
}
3052530703

30526-
result_t test_veor_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30704+
int32x4_t a = vld1q_s32(_a);
30705+
int32x4_t b = vld1q_s32(_b);
30706+
int32x4_t c = veorq_s32(a, b);
30707+
return validate_int32(c, _d[0], _d[1], _d[2], _d[3]);
30708+
#else
30709+
return TEST_UNIMPL;
30710+
#endif // ENABLE_TEST_ALL
30711+
}
3052730712

30528-
result_t test_veor_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30713+
result_t test_veorq_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30714+
#ifdef ENABLE_TEST_ALL
30715+
const int64_t *_a = (int64_t *)impl.test_cases_int_pointer1;
30716+
const int64_t *_b = (int64_t *)impl.test_cases_int_pointer2;
30717+
int64_t _d[2];
30718+
for (int i = 0; i < 2; i++) {
30719+
_d[i] = _a[i] ^ _b[i];
30720+
}
3052930721

30530-
result_t test_veorq_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30722+
int64x2_t a = vld1q_s64(_a);
30723+
int64x2_t b = vld1q_s64(_b);
30724+
int64x2_t c = veorq_s64(a, b);
30725+
return validate_int64(c, _d[0], _d[1]);
30726+
#else
30727+
return TEST_UNIMPL;
30728+
#endif // ENABLE_TEST_ALL
30729+
}
30730+
30731+
result_t test_veorq_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30732+
#ifdef ENABLE_TEST_ALL
30733+
const uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1;
30734+
const uint8_t *_b = (uint8_t *)impl.test_cases_int_pointer2;
30735+
uint8_t _d[16];
30736+
for (int i = 0; i < 16; i++) {
30737+
_d[i] = _a[i] ^ _b[i];
30738+
}
3053130739

30532-
result_t test_veorq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30740+
uint8x16_t a = vld1q_u8(_a);
30741+
uint8x16_t b = vld1q_u8(_b);
30742+
uint8x16_t c = veorq_u8(a, b);
30743+
return validate_uint8(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7], _d[8], _d[9], _d[10], _d[11], _d[12],
30744+
_d[13], _d[14], _d[15]);
30745+
#else
30746+
return TEST_UNIMPL;
30747+
#endif // ENABLE_TEST_ALL
30748+
}
3053330749

30534-
result_t test_veorq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30750+
result_t test_veorq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30751+
#ifdef ENABLE_TEST_ALL
30752+
const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
30753+
const uint16_t *_b = (uint16_t *)impl.test_cases_int_pointer2;
30754+
uint16_t _d[8];
30755+
for (int i = 0; i < 8; i++) {
30756+
_d[i] = _a[i] ^ _b[i];
30757+
}
3053530758

30536-
result_t test_veorq_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30759+
uint16x8_t a = vld1q_u16(_a);
30760+
uint16x8_t b = vld1q_u16(_b);
30761+
uint16x8_t c = veorq_u16(a, b);
30762+
return validate_uint16(c, _d[0], _d[1], _d[2], _d[3], _d[4], _d[5], _d[6], _d[7]);
30763+
#else
30764+
return TEST_UNIMPL;
30765+
#endif // ENABLE_TEST_ALL
30766+
}
3053730767

30538-
result_t test_veorq_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30768+
result_t test_veorq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30769+
#ifdef ENABLE_TEST_ALL
30770+
const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
30771+
const uint32_t *_b = (uint32_t *)impl.test_cases_int_pointer2;
30772+
uint32_t _d[4];
30773+
for (int i = 0; i < 4; i++) {
30774+
_d[i] = _a[i] ^ _b[i];
30775+
}
3053930776

30540-
result_t test_veorq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30777+
uint32x4_t a = vld1q_u32(_a);
30778+
uint32x4_t b = vld1q_u32(_b);
30779+
uint32x4_t c = veorq_u32(a, b);
30780+
return validate_uint32(c, _d[0], _d[1], _d[2], _d[3]);
30781+
#else
30782+
return TEST_UNIMPL;
30783+
#endif // ENABLE_TEST_ALL
30784+
}
3054130785

30542-
result_t test_veorq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30786+
result_t test_veorq_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
30787+
#ifdef ENABLE_TEST_ALL
30788+
const uint64_t *_a = (uint64_t *)impl.test_cases_int_pointer1;
30789+
const uint64_t *_b = (uint64_t *)impl.test_cases_int_pointer2;
30790+
uint64_t _d[2];
30791+
for (int i = 0; i < 2; i++) {
30792+
_d[i] = _a[i] ^ _b[i];
30793+
}
3054330794

30544-
result_t test_veorq_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
30795+
uint64x2_t a = vld1q_u64(_a);
30796+
uint64x2_t b = vld1q_u64(_b);
30797+
uint64x2_t c = veorq_u64(a, b);
30798+
return validate_uint64(c, _d[0], _d[1]);
30799+
#else
30800+
return TEST_UNIMPL;
30801+
#endif // ENABLE_TEST_ALL
30802+
}
3054530803

3054630804
result_t test_vbic_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
3054730805
#ifdef ENABLE_TEST_ALL

0 commit comments

Comments
 (0)
0