@@ -225,6 +225,8 @@ static int ins_compl_pum_key(int c);
225
225
static int ins_compl_key2count (int c );
226
226
static void show_pum (int prev_w_wrow , int prev_w_leftcol );
227
227
static unsigned quote_meta (char_u * dest , char_u * str , int len );
228
+ static int strncmp_simd (const char_u * s1 , const char_u * s2 , size_t n );
229
+ static int strnicmp_simd (const char_u * s1 , const char_u * s2 , size_t n );
228
230
229
231
#ifdef FEAT_SPELL
230
232
static void spell_back_to_badword (void );
@@ -798,7 +800,7 @@ ins_compl_add(
798
800
do
799
801
{
800
802
if (!match_at_original_text (match )
801
- && STRNCMP (match -> cp_str .string , str , len ) == 0
803
+ && strncmp_simd (match -> cp_str .string , str , len ) == 0
802
804
&& ((int )match -> cp_str .length <= len
803
805
|| match -> cp_str .string [len ] == NUL ))
804
806
return NOTDONE ;
@@ -899,8 +901,8 @@ ins_compl_equal(compl_T *match, char_u *str, int len)
899
901
if (match -> cp_flags & CP_EQUAL )
900
902
return TRUE;
901
903
if (match -> cp_flags & CP_ICASE )
902
- return STRNICMP (match -> cp_str .string , str , (size_t )len ) == 0 ;
903
- return STRNCMP (match -> cp_str .string , str , (size_t )len ) == 0 ;
904
+ return strnicmp_simd (match -> cp_str .string , str , (size_t )len ) == 0 ;
905
+ return strncmp_simd (match -> cp_str .string , str , (size_t )len ) == 0 ;
904
906
}
905
907
906
908
/*
@@ -1243,6 +1245,207 @@ ins_compl_fuzzy_cmp(const void *a, const void *b)
1243
1245
return sa == sb ? (ia == ib ? 0 : (ia < ib ? -1 : 1 )) : (sa < sb ? 1 : -1 );
1244
1246
}
1245
1247
1248
+ /*
1249
+ * strncmp_simd: Compare up to n bytes of two strings (s1 and s2) using SIMD.
1250
+ * Falls back to default strncmp_simd if SIMD is not supported or unavailable.
1251
+ */
1252
+ static int
1253
+ strncmp_simd (const char_u * s1 , const char_u * s2 , size_t n )
1254
+ {
1255
+ if (n == 0 )
1256
+ return 0 ;
1257
+
1258
+ #ifdef PLATFORM_NO_SIMD
1259
+ return STRNCMP (s1 , s2 , n );
1260
+ #endif
1261
+
1262
+ size_t i = 0 ;
1263
+
1264
+ #ifdef PLATFORM_X86
1265
+ const size_t simd_width = 16 ; // SSE2 processes 16 bytes at a time
1266
+ while (n >= simd_width )
1267
+ {
1268
+ __m128i v1 = _mm_loadu_si128 ((__m128i * )(s1 + i ));
1269
+ __m128i v2 = _mm_loadu_si128 ((__m128i * )(s2 + i ));
1270
+ __m128i cmp = _mm_cmpeq_epi8 (v1 , v2 );
1271
+
1272
+ // Generate a mask of unequal bytes
1273
+ int mask = _mm_movemask_epi8 (cmp );
1274
+ if (mask != 0xFFFF )
1275
+ { // Not all bytes are equal
1276
+ for (size_t j = 0 ; j < simd_width ; ++ j )
1277
+ {
1278
+ if ((unsigned char )s1 [i + j ] != (unsigned char )s2 [i + j ])
1279
+ return (unsigned char )s1 [i + j ] - (unsigned char )s2 [i + j ];
1280
+ if (s1 [i + j ] == '\0' || s2 [i + j ] == '\0' )
1281
+ return 0 ; // End of string
1282
+ }
1283
+ }
1284
+
1285
+ n -= simd_width ;
1286
+ i += simd_width ;
1287
+ }
1288
+
1289
+ #elif defined(PLATFORM_ARM_NEON )
1290
+ const size_t simd_width = 16 ; // NEON processes 16 bytes at a time
1291
+ while (n >= simd_width )
1292
+ {
1293
+ uint8x16_t v1 = vld1q_u8 ((uint8_t * )(s1 + i ));
1294
+ uint8x16_t v2 = vld1q_u8 ((uint8_t * )(s2 + i ));
1295
+ uint8x16_t cmp = vceqq_u8 (v1 , v2 );
1296
+
1297
+ // Check if all bytes are equal
1298
+ uint64_t high = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 1 );
1299
+ uint64_t low = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 0 );
1300
+ if ((high != ~0ULL ) || (low != ~0ULL ))
1301
+ {
1302
+ for (size_t j = 0 ; j < simd_width ; ++ j )
1303
+ {
1304
+ if ((unsigned char )s1 [i + j ] != (unsigned char )s2 [i + j ])
1305
+ return (unsigned char )s1 [i + j ] - (unsigned char )s2 [i + j ];
1306
+ if (s1 [i + j ] == '\0' || s2 [i + j ] == '\0' )
1307
+ return 0 ; // End of string
1308
+ }
1309
+ }
1310
+
1311
+ n -= simd_width ;
1312
+ i += simd_width ;
1313
+ }
1314
+ #endif
1315
+
1316
+ // Handle remaining bytes (non-SIMD path)
1317
+ for (; i < n ; ++ i )
1318
+ {
1319
+ if ((unsigned char )s1 [i ] != (unsigned char )s2 [i ])
1320
+ return (unsigned char )s1 [i ] - (unsigned char )s2 [i ];
1321
+ if (s1 [i ] == '\0' || s2 [i ] == '\0' )
1322
+ return 0 ;
1323
+ }
1324
+
1325
+ return 0 ;
1326
+ }
1327
+
1328
+ #ifdef PLATFORM_X86
1329
+ static inline __m128i simd_tolower (__m128i r ) {
1330
+ __m128i lower_bound = _mm_set1_epi8 ('A' - 1 );
1331
+ __m128i upper_bound = _mm_set1_epi8 ('Z' + 1 );
1332
+ __m128i is_upper = _mm_and_si128 (
1333
+ _mm_cmpgt_epi8 (r , lower_bound ),
1334
+ _mm_cmplt_epi8 (r , upper_bound )
1335
+ );
1336
+ __m128i mask = _mm_and_si128 (is_upper , _mm_set1_epi8 (32 ));
1337
+ return _mm_add_epi8 (r , mask );
1338
+ }
1339
+ #elif defined(PLATFORM_ARM_NEON )
1340
+ static inline uint8x16_t simd_tolower (uint8x16_t r ) {
1341
+ uint8x16_t lower_bound = vdupq_n_u8 ('A' );
1342
+ uint8x16_t upper_bound = vdupq_n_u8 ('Z' );
1343
+ uint8x16_t is_upper = vandq_u8 (
1344
+ vcgeq_u8 (r , lower_bound ),
1345
+ vcleq_u8 (r , upper_bound )
1346
+ );
1347
+ uint8x16_t mask = vandq_u8 (is_upper , vdupq_n_u8 (32 ));
1348
+ return vaddq_u8 (r , mask );
1349
+ }
1350
+ #endif
1351
+
1352
+ static int
1353
+ strnicmp_simd (const char_u * s1 , const char_u * s2 , size_t n )
1354
+ {
1355
+ if (n == 0 )
1356
+ return 0 ;
1357
+
1358
+ // Handle multi-byte characters
1359
+ if (has_mbyte )
1360
+ return STRNICMP (s1 , s2 , n );
1361
+
1362
+ #ifdef PLATFORM_NO_SIMD
1363
+ return STRNICMP (s1 , s2 , n );
1364
+ #endif
1365
+
1366
+ // SIMD path for single-byte characters
1367
+ #ifdef PLATFORM_X86
1368
+ const size_t simd_width = 16 ;
1369
+ size_t i = 0 , j = 0 ;
1370
+ while (n - i >= simd_width )
1371
+ {
1372
+ __m128i v1 = _mm_loadu_si128 ((__m128i * )(s1 + i ));
1373
+ __m128i v2 = _mm_loadu_si128 ((__m128i * )(s2 + i ));
1374
+
1375
+ // Convert to lowercase
1376
+ v1 = simd_tolower (v1 );
1377
+ v2 = simd_tolower (v2 );
1378
+
1379
+ // Compare
1380
+ __m128i cmp = _mm_cmpeq_epi8 (v1 , v2 );
1381
+ int mask = _mm_movemask_epi8 (cmp );
1382
+ if (mask != 0xFFFF ) // Not all bytes match
1383
+ {
1384
+ for (j = 0 ; j < simd_width ; ++ j )
1385
+ {
1386
+ char c1 = s1 [i + j ] | 0x20 ; // To lower
1387
+ char c2 = s2 [i + j ] | 0x20 ;
1388
+ if (c1 != c2 )
1389
+ return c1 - c2 ;
1390
+ if (c1 == '\0' )
1391
+ return 0 ;
1392
+ }
1393
+ }
1394
+ i += simd_width ;
1395
+ }
1396
+ s1 += i ;
1397
+ s2 += i ;
1398
+ n -= i ;
1399
+
1400
+ #elif defined(PLATFORM_ARM_NEON )
1401
+ const size_t simd_width = 16 ;
1402
+ size_t i = 0 , j = 0 ;
1403
+ while (n - i >= simd_width )
1404
+ {
1405
+ uint8x16_t v1 = vld1q_u8 ((uint8_t * )(s1 + i ));
1406
+ uint8x16_t v2 = vld1q_u8 ((uint8_t * )(s2 + i ));
1407
+
1408
+ // Convert to lowercase
1409
+ v1 = simd_tolower (v1 );
1410
+ v2 = simd_tolower (v2 );
1411
+
1412
+ // Compare
1413
+ uint8x16_t cmp = vceqq_u8 (v1 , v2 );
10669
1414
+ uint64_t high = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 1 );
1415
+ uint64_t low = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 0 );
1416
+ if (high != ~0ULL || low != ~0ULL )
1417
+ {
1418
+ for (j = 0 ; j < simd_width ; ++ j )
1419
+ {
1420
+ char c1 = s1 [i + j ] | 0x20 ; // To lower
1421
+ char c2 = s2 [i + j ] | 0x20 ;
1422
+ if (c1 != c2 )
1423
+ return c1 - c2 ;
1424
+ if (c1 == '\0' )
1425
+ return 0 ;
1426
+ }
1427
+ }
1428
+ i += simd_width ;
1429
+ }
1430
+ s1 += i ;
1431
+ s2 += i ;
1432
+ n -= i ;
1433
+ #endif
1434
+
1435
+ // Fallback for remaining bytes
1436
+ for (i = 0 ; i < n ; ++ i )
1437
+ {
1438
+ char c1 = s1 [i ] | 0x20 ; // To lower
1439
+ char c2 = s2 [i ] | 0x20 ;
1440
+ if (c1 != c2 )
1441
+ return c1 - c2 ;
1442
+ if (c1 == '\0' )
1443
+ return 0 ;
1444
+ }
1445
+
1446
+ return 0 ;
1447
+ }
1448
+
1246
1449
/*
1247
1450
* Build a popup menu to show the completion matches.
1248
1451
* Returns the popup menu entry that should be selected. Returns -1 if nothing
0 commit comments