@@ -1360,22 +1360,6 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
1360
1360
const int8x16_t v1_1hs = vsubq_s8 (v1_1h , s8b );
1361
1361
1362
1362
// dot product into int16x8_t
1363
- #if defined(__ARM_FEATURE_DOTPROD )
1364
- int32x4_t p_0 = vdotq_s32 (vdupq_n_s32 (0 ), v0_0ls , v1_0ls );
1365
- int32x4_t p_1 = vdotq_s32 (vdupq_n_s32 (0 ), v0_1ls , v1_1ls );
1366
-
1367
- p_0 = vdotq_s32 (p_0 , v0_0hs , v1_0hs );
1368
- p_1 = vdotq_s32 (p_1 , v0_1hs , v1_1hs );
1369
-
1370
- // scalar
1371
- #if defined(__ARM_FEATURE_QRDMX )
1372
- sum0 += d0_0 * d1_0 * vaddvq_s32 (p_0 );
1373
- sum1 += d0_1 * d1_1 * vaddvq_s32 (p_1 );
1374
- #else
1375
- sum0 += d0_0 * d1_0 * (vgetq_lane_s32 (p_0 , 0 ) + vgetq_lane_s32 (p_0 , 1 ) + vgetq_lane_s32 (p_0 , 2 ) + vgetq_lane_s32 (p_0 , 3 ));
1376
- sum1 += d0_1 * d1_1 * (vgetq_lane_s32 (p_1 , 0 ) + vgetq_lane_s32 (p_1 , 1 ) + vgetq_lane_s32 (p_1 , 2 ) + vgetq_lane_s32 (p_1 , 3 ));
1377
- #endif
1378
- #else
1379
1363
const int16x8_t pl0l = vmull_s8 (vget_low_s8 (v0_0ls ), vget_low_s8 (v1_0ls ));
1380
1364
const int16x8_t pl0h = vmull_s8 (vget_high_s8 (v0_0ls ), vget_high_s8 (v1_0ls ));
1381
1365
@@ -1404,7 +1388,6 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
1404
1388
#else
1405
1389
sum0 += d0_0 * d1_0 * (vgetq_lane_s16 (p_0 , 0 ) + vgetq_lane_s16 (p_0 , 1 ) + vgetq_lane_s16 (p_0 , 2 ) + vgetq_lane_s16 (p_0 , 3 ) + vgetq_lane_s16 (p_0 , 4 ) + vgetq_lane_s16 (p_0 , 5 ) + vgetq_lane_s16 (p_0 , 6 ) + vgetq_lane_s16 (p_0 , 7 ));
1406
1390
sum1 += d0_1 * d1_1 * (vgetq_lane_s16 (p_1 , 0 ) + vgetq_lane_s16 (p_1 , 1 ) + vgetq_lane_s16 (p_1 , 2 ) + vgetq_lane_s16 (p_1 , 3 ) + vgetq_lane_s16 (p_1 , 4 ) + vgetq_lane_s16 (p_1 , 5 ) + vgetq_lane_s16 (p_1 , 6 ) + vgetq_lane_s16 (p_1 , 7 ));
1407
- #endif
1408
1391
#endif
1409
1392
}
1410
1393
0 commit comments