@@ -288,12 +288,11 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
288
288
}
289
289
290
290
#define SHIFT_TYPE uint16_t
291
- #define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
292
- #define SHIFT_OVERFLOW (NOT_FOUND - 1U)
291
+ #define MAX_SHIFT ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
293
292
294
293
#define TABLE_SIZE_BITS 7
295
- #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
296
- #define TABLE_MASK (TABLE_SIZE - 1U )
294
+ #define TABLE_SIZE (1 << TABLE_SIZE_BITS)
295
+ #define TABLE_MASK (TABLE_SIZE - 1 )
297
296
298
297
typedef struct STRINGLIB (_pre ) {
299
298
const STRINGLIB_CHAR * needle ;
@@ -316,17 +315,15 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
316
315
needle + p -> period ,
317
316
p -> cut * STRINGLIB_SIZEOF_CHAR ));
318
317
// Now fill up a table
319
- memset (& (p -> table [0 ]), 0xff , TABLE_SIZE * sizeof (SHIFT_TYPE ));
320
- assert (p -> table [0 ] == NOT_FOUND );
321
- assert (p -> table [TABLE_MASK ] == NOT_FOUND );
318
+ SHIFT_TYPE default_shift = Py_SAFE_DOWNCAST (Py_MIN (len_needle , MAX_SHIFT ),
319
+ Py_ssize_t , SHIFT_TYPE );
320
+ for (int i = 0 ; i < TABLE_SIZE ; i ++ ) {
321
+ p -> table [i ] = default_shift ;
322
+ }
322
323
for (Py_ssize_t i = 0 ; i < len_needle ; i ++ ) {
323
- Py_ssize_t shift = len_needle - i ;
324
- if (shift > SHIFT_OVERFLOW ) {
325
- shift = SHIFT_OVERFLOW ;
326
- }
327
- p -> table [needle [i ] & TABLE_MASK ] = Py_SAFE_DOWNCAST (shift ,
328
- Py_ssize_t ,
329
- SHIFT_TYPE );
324
+ Py_ssize_t shift = Py_MIN (MAX_SHIFT , len_needle - i );
325
+ int index = needle [i ] & TABLE_MASK ;
326
+ p -> table [index ] = Py_SAFE_DOWNCAST (shift , Py_ssize_t , SHIFT_TYPE );
330
327
}
331
328
}
332
329
@@ -342,6 +339,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
342
339
const STRINGLIB_CHAR * needle = p -> needle ;
343
340
const STRINGLIB_CHAR * window = haystack ;
344
341
const STRINGLIB_CHAR * last_window = haystack + len_haystack - len_needle ;
342
+ SHIFT_TYPE * table = p -> table ;
345
343
LOG ("===== Two-way: \"%s\" in \"%s\". =====\n" , needle , haystack );
346
344
347
345
if (p -> is_periodic ) {
@@ -362,16 +360,9 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
362
360
// as well jump to line up the character *after* the
363
361
// current window.
364
362
STRINGLIB_CHAR first_outside = window [len_needle ];
365
- SHIFT_TYPE shift = p -> table [first_outside & TABLE_MASK ];
366
- if (shift == NOT_FOUND ) {
367
- LOG ("\"%c\" not found. Skipping entirely.\n" ,
368
- first_outside );
369
- window += len_needle + 1 ;
370
- }
371
- else {
372
- LOG ("Shifting to line up \"%c\".\n" , first_outside );
373
- window += shift ;
374
- }
363
+ SHIFT_TYPE shift = table [first_outside & TABLE_MASK ];
364
+ LOG ("Shifting to line up \"%c\".\n" , first_outside );
365
+ window += shift ;
375
366
memory = 0 ;
376
367
continue ;
377
368
}
@@ -423,16 +414,9 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
423
414
// as well jump to line up the character *after* the
424
415
// current window.
425
416
STRINGLIB_CHAR first_outside = window [len_needle ];
426
- SHIFT_TYPE shift = p -> table [first_outside & TABLE_MASK ];
427
- if (shift == NOT_FOUND ) {
428
- LOG ("\"%c\" not found. Skipping entirely.\n" ,
429
- first_outside );
430
- window += len_needle + 1 ;
431
- }
432
- else {
433
- LOG ("Shifting to line up \"%c\".\n" , first_outside );
434
- window += shift ;
435
- }
417
+ SHIFT_TYPE shift = table [first_outside & TABLE_MASK ];
418
+ LOG ("Shifting to line up \"%c\".\n" , first_outside );
419
+ window += shift ;
436
420
continue ;
437
421
}
438
422
@@ -544,8 +528,7 @@ STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
544
528
}
545
529
546
530
#undef SHIFT_TYPE
547
- #undef NOT_FOUND
548
- #undef SHIFT_OVERFLOW
531
+ #undef MAX_SHIFT
549
532
#undef TABLE_SIZE_BITS
550
533
#undef TABLE_SIZE
551
534
#undef TABLE_MASK
@@ -588,7 +571,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
588
571
}
589
572
590
573
mlast = m - 1 ;
591
- skip = mlast - 1 ;
574
+ skip = mlast ;
592
575
mask = 0 ;
593
576
594
577
if (mode != FAST_RSEARCH ) {
0 commit comments