@@ -225,6 +225,8 @@ static int ins_compl_pum_key(int c);
225
225
static int ins_compl_key2count (int c );
226
226
static void show_pum (int prev_w_wrow , int prev_w_leftcol );
227
227
static unsigned quote_meta (char_u * dest , char_u * str , int len );
228
+ static int strncmp_simd (const char_u * s1 , const char_u * s2 , size_t n );
229
+ static int strnicmp_simd (const char_u * s1 , const char_u * s2 , size_t n );
228
230
229
231
#ifdef FEAT_SPELL
230
232
static void spell_back_to_badword (void );
@@ -798,7 +800,7 @@ ins_compl_add(
798
800
do
799
801
{
800
802
if (!match_at_original_text (match )
801
- && STRNCMP (match -> cp_str .string , str , len ) == 0
803
+ && strncmp_simd (match -> cp_str .string , str , len ) == 0
802
804
&& ((int )match -> cp_str .length <= len
803
805
|| match -> cp_str .string [len ] == NUL ))
804
806
return NOTDONE ;
@@ -899,8 +901,8 @@ ins_compl_equal(compl_T *match, char_u *str, int len)
899
901
if (match -> cp_flags & CP_EQUAL )
900
902
return TRUE;
901
903
if (match -> cp_flags & CP_ICASE )
902
- return STRNICMP (match -> cp_str .string , str , (size_t )len ) == 0 ;
903
- return STRNCMP (match -> cp_str .string , str , (size_t )len ) == 0 ;
904
+ return strnicmp_simd (match -> cp_str .string , str , (size_t )len ) == 0 ;
905
+ return strncmp_simd (match -> cp_str .string , str , (size_t )len ) == 0 ;
904
906
}
905
907
906
908
/*
@@ -1243,6 +1245,209 @@ ins_compl_fuzzy_cmp(const void *a, const void *b)
1243
1245
return sa == sb ? (ia == ib ? 0 : (ia < ib ? -1 : 1 )) : (sa < sb ? 1 : -1 );
1244
1246
}
1245
1247
1248
+ /*
1249
+ * strncmp_simd: Compare up to n bytes of two strings (s1 and s2) using SIMD.
1250
+ * Falls back to default strncmp_simd if SIMD is not supported or unavailable.
1251
+ */
1252
+ static int
1253
+ strncmp_simd (const char_u * s1 , const char_u * s2 , size_t n )
1254
+ {
1255
+ if (n == 0 )
1256
+ return 0 ;
1257
+
1258
+ #ifdef PLATFORM_NO_SIMD
1259
+ return STRNCMP (s1 , s2 , n );
1260
+ #endif
1261
+
1262
+ size_t i = 0 ;
1263
+
1264
+ #ifdef PLATFORM_X86
1265
+ const size_t simd_width = 16 ; // SSE2 processes 16 bytes at a time
1266
+ while (n >= simd_width )
1267
+ {
1268
+ __m128i v1 = _mm_loadu_si128 ((__m128i * )(s1 + i ));
1269
+ __m128i v2 = _mm_loadu_si128 ((__m128i * )(s2 + i ));
1270
+ __m128i cmp = _mm_cmpeq_epi8 (v1 , v2 );
1271
+
1272
+ // Generate a mask of unequal bytes
1273
+ int mask = _mm_movemask_epi8 (cmp );
1274
+ if (mask != 0xFFFF )
1275
+ { // Not all bytes are equal
1276
+ for (size_t j = 0 ; j < simd_width ; ++ j )
1277
+ {
1278
+ if ((unsigned char )s1 [i + j ] != (unsigned char )s2 [i + j ])
1279
+ return (unsigned char )s1 [i + j ] - (unsigned char )s2 [i + j ];
1280
+ if (s1 [i + j ] == '\0' || s2 [i + j ] == '\0' )
1281
+ return 0 ; // End of string
1282
+ }
1283
+ }
1284
+
1285
+ n -= simd_width ;
1286
+ i += simd_width ;
1287
+ }
1288
+
1289
+ #elif defined(PLATFORM_ARM_NEON )
1290
+ const size_t simd_width = 16 ; // NEON processes 16 bytes at a time
1291
+ while (n >= simd_width )
1292
+ {
1293
+ uint8x16_t v1 = vld1q_u8 ((uint8_t * )(s1 + i ));
1294
+ uint8x16_t v2 = vld1q_u8 ((uint8_t * )(s2 + i ));
1295
+ uint8x16_t cmp = vceqq_u8 (v1 , v2 );
1296
+
1297
+ // Check if all bytes are equal
1298
+ uint64_t high = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 1 );
1299
+ uint64_t low = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 0 );
1300
+ if ((high != ~0ULL ) || (low != ~0ULL ))
1301
+ {
1302
+ for (size_t j = 0 ; j < simd_width ; ++ j )
1303
+ {
1304
+ if ((unsigned char )s1 [i + j ] != (unsigned char )s2 [i + j ])
1305
+ return (unsigned char )s1 [i + j ] - (unsigned char )s2 [i + j ];
1306
+ if (s1 [i + j ] == '\0' || s2 [i + j ] == '\0' )
1307
+ return 0 ; // End of string
1308
+ }
1309
+ }
1310
+
1311
+ n -= simd_width ;
1312
+ i += simd_width ;
1313
+ }
1314
+ #endif
1315
+
1316
+ // Handle remaining bytes (non-SIMD path)
1317
+ for (; i < n ; ++ i )
1318
+ {
1319
+ if ((unsigned char )s1 [i ] != (unsigned char )s2 [i ])
1320
+ return (unsigned char )s1 [i ] - (unsigned char )s2 [i ];
1321
+ if (s1 [i ] == '\0' || s2 [i ] == '\0' )
1322
+ return 0 ;
1323
+ }
1324
+
1325
+ return 0 ;
1326
+ }
1327
+
1328
+ #ifdef PLATFORM_X86
1329
+ static inline __m128i simd_tolower (__m128i r )
1330
+ {
1331
+ __m128i lower_bound = _mm_set1_epi8 ('A' - 1 );
1332
+ __m128i upper_bound = _mm_set1_epi8 ('Z' + 1 );
1333
+ __m128i is_upper = _mm_and_si128 (
1334
+ _mm_cmpgt_epi8 (r , lower_bound ),
1335
+ _mm_cmplt_epi8 (r , upper_bound )
1336
+ );
1337
+ __m128i mask = _mm_and_si128 (is_upper , _mm_set1_epi8 (32 ));
1338
+ return _mm_add_epi8 (r , mask );
1339
+ }
1340
+ #elif defined(PLATFORM_ARM_NEON )
1341
+ static inline uint8x16_t simd_tolower (uint8x16_t r )
1342
+ {
1343
+ uint8x16_t lower_bound = vdupq_n_u8 ('A' );
1344
+ uint8x16_t upper_bound = vdupq_n_u8 ('Z' );
1345
+ uint8x16_t is_upper = vandq_u8 (
1346
+ vcgeq_u8 (r , lower_bound ),
1347
+ vcleq_u8 (r , upper_bound )
1348
+ );
1349
+ uint8x16_t mask = vandq_u8 (is_upper , vdupq_n_u8 (32 ));
1350
+ return vaddq_u8 (r , mask );
1351
+ }
1352
+ #endif
1353
+
1354
+ static int
1355
+ strnicmp_simd (const char_u * s1 , const char_u * s2 , size_t n )
1356
+ {
1357
+ if (n == 0 )
1358
+ return 0 ;
1359
+
1360
+ // Handle multi-byte characters
1361
+ if (has_mbyte )
1362
+ return STRNICMP (s1 , s2 , n );
1363
+
1364
+ #ifdef PLATFORM_NO_SIMD
1365
+ return STRNICMP (s1 , s2 , n );
1366
+ #endif
1367
+
1368
+ // SIMD path for single-byte characters
1369
+ #ifdef PLATFORM_X86
1370
+ const size_t simd_width = 16 ;
1371
+ size_t i = 0 , j = 0 ;
1372
+ while (n - i >= simd_width )
1373
+ {
1374
+ __m128i v1 = _mm_loadu_si128 ((__m128i * )(s1 + i ));
1375
+ __m128i v2 = _mm_loadu_si128 ((__m128i * )(s2 + i ));
1376
+
1377
+ // Convert to lowercase
1378
+ v1 = simd_tolower (v1 );
1379
+ v2 = simd_tolower (v2 );
1380
+
1381
+ // Compare
1382
+ __m128i cmp = _mm_cmpeq_epi8 (v1 , v2 );
1383
+ int mask = _mm_movemask_epi8 (cmp );
1384
+ if (mask != 0xFFFF ) // Not all bytes match
1385
+ {
1386
+ for (j = 0 ; j < simd_width ; ++ j )
1387
+ {
1388
+ char c1 = s1 [i + j ] | 0x20 ; // To lower
1389
+ char c2 = s2 [i + j ] | 0x20 ;
1390
+ if (c1 != c2 )
1391
+ return c1 - c2 ;
1392
+ if (c1 == '\0' )
1393
+ return 0 ;
1394
+ }
1395
+ }
1396
+ i += simd_width ;
1397
+ }
1398
+ s1 += i ;
1399
+ s2 += i ;
1400
+ n -= i ;
1401
+
1402
+ #elif defined(PLATFORM_ARM_NEON )
1403
+ const size_t simd_width = 16 ;
1404
+ size_t i = 0 , j = 0 ;
1405
+ while (n - i >= simd_width )
1406
+ {
1407
+ uint8x16_t v1 = vld1q_u8 ((uint8_t * )(s1 + i ));
1408
+ uint8x16_t v2 = vld1q_u8 ((uint8_t * )(s2 + i ));
1409
+
1410
+ // Convert to lowercase
1411
+ v1 = simd_tolower (v1 );
1412
+ v2 = simd_tolower (v2 );
1413
+
1414
+ // Compare
1415
+ uint8x16_t cmp = vceqq_u8 (v1 , v2 );
1416
+ uint64_t high = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 1 );
1417
+ uint64_t low = vgetq_lane_u64 (vreinterpretq_u64_u8 (cmp ), 0 );
1418
+ if (high != ~0ULL || low != ~0ULL )
1419
+ {
1420
+ for (j = 0 ; j < simd_width ; ++ j )
1421
+ {
1422
+ char c1 = s1 [i + j ] | 0x20 ; // To lower
1423
+ char c2 = s2 [i + j ] | 0x20 ;
1424
+ if (c1 != c2 )
1425
+ return c1 - c2 ;
1426
+ if (c1 == '\0' )
1427
+ return 0 ;
1428
+ }
1429
+ }
1430
+ i += simd_width ;
1431
+ }
1432
+ s1 += i ;
1433
+ s2 += i ;
1434
+ n -= i ;
1435
+ #endif
1436
+
1437
+ // Fallback for remaining bytes
1438
+ for (i = 0 ; i < n ; ++ i )
1439
+ {
1440
+ char c1 = s1 [i ] | 0x20 ; // To lower
1441
+ char c2 = s2 [i ] | 0x20 ;
1442
+ if (c1 != c2 )
1443
+ return c1 - c2 ;
1444
+ if (c1 == '\0' )
1445
+ return 0 ;
1446
+ }
1447
+
1448
+ return 0 ;
1449
+ }
1450
+
1246
1451
/*
1247
1452
* Build a popup menu to show the completion matches.
1248
1453
* Returns the popup menu entry that should be selected. Returns -1 if nothing
0 commit comments