@@ -287,7 +287,6 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
287
287
return cut ;
288
288
}
289
289
290
- #define USE_TABLE
291
290
#define SHIFT_TYPE uint16_t
292
291
#define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
293
292
#define SHIFT_OVERFLOW (NOT_FOUND - 1U)
@@ -326,7 +325,8 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
326
325
shift = SHIFT_OVERFLOW ;
327
326
}
328
327
p -> table [needle [i ] & TABLE_MASK ] = Py_SAFE_DOWNCAST (shift ,
329
- Py_ssize_t , SHIFT_TYPE );
328
+ Py_ssize_t ,
329
+ SHIFT_TYPE );
330
330
}
331
331
}
332
332
@@ -336,116 +336,128 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
336
336
{
337
337
// Crochemore and Perrin's (1991) Two-Way algorithm.
338
338
// See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
339
- const STRINGLIB_CHAR * needle = p -> needle ;
340
339
Py_ssize_t len_needle = p -> len_needle ;
341
340
Py_ssize_t cut = p -> cut ;
342
341
Py_ssize_t period = p -> period ;
343
- LOG ("===== Checking \"%s\" in \"%s\". =====\n" , needle , haystack );
342
+ const STRINGLIB_CHAR * needle = p -> needle ;
343
+ const STRINGLIB_CHAR * window = haystack ;
344
+ const STRINGLIB_CHAR * last_window = haystack + len_haystack - len_needle ;
345
+ LOG ("===== Two-way: \"%s\" in \"%s\". =====\n" , needle , haystack );
344
346
345
347
if (p -> is_periodic ) {
346
348
LOG ("Needle is periodic.\n" );
347
- Py_ssize_t j = 0 ;
348
349
Py_ssize_t memory = 0 ;
349
- while (j <= len_haystack - len_needle ) {
350
+ while (window <= last_window ) {
350
351
Py_ssize_t i = Py_MAX (cut , memory );
351
352
352
353
// Visualize the line-up:
353
354
LOG ("> " ); LOG_STRING (haystack , len_haystack );
354
- LOG ("\n> " ); LOG ("%*s" , j , "" ); LOG_STRING (needle , len_needle );
355
- LOG ("\n> " ); LOG ("%*s" , j + i , "" ); LOG (" ^ <-- cut\n" );
355
+ LOG ("\n> " ); LOG ("%*s" , window - haystack , "" );
356
+ LOG_STRING (needle , len_needle );
357
+ LOG ("\n> " ); LOG ("%*s" , window - haystack + i , "" );
358
+ LOG (" ^ <-- cut\n" );
356
359
357
- if (haystack [ j + i ] != needle [i ++ ]) {
360
+ if (window [ i ] != needle [i ++ ]) {
358
361
// Sunday's trick: if we're going to jump, we might
359
362
// as well jump to line up the character *after* the
360
363
// current window.
361
- STRINGLIB_CHAR first_outside = haystack [ j + len_needle ];
364
+ STRINGLIB_CHAR first_outside = window [ len_needle ];
362
365
SHIFT_TYPE shift = p -> table [first_outside & TABLE_MASK ];
363
366
if (shift == NOT_FOUND ) {
364
- LOG ("\"%c\" not found. Skipping entirely.\n" , first_outside );
365
- j += len_needle + 1 ;
367
+ LOG ("\"%c\" not found. Skipping entirely.\n" ,
368
+ first_outside );
369
+ window += len_needle + 1 ;
366
370
}
367
371
else {
368
372
LOG ("Shifting to line up \"%c\".\n" , first_outside );
369
- j += shift ;
373
+ window += shift ;
370
374
}
371
375
memory = 0 ;
372
376
continue ;
373
377
}
374
378
375
- while (i < len_needle && needle [i ] == haystack [ j + i ]) {
379
+ while (i < len_needle && needle [i ] == window [ i ]) {
376
380
i ++ ;
377
381
}
378
382
if (i >= len_needle ) {
379
383
LOG ("Right half matches.\n" );
380
384
i = cut - 1 ;
381
- while (i >= memory && needle [i ] == haystack [ j + i ]) {
385
+ while (i >= memory && needle [i ] == window [ i ]) {
382
386
i -- ;
383
387
}
384
388
if (i < memory ) {
385
- LOG ("Left half matches. Returning %d.\n" , j );
386
- return j ;
389
+ LOG ("Left half matches. Returning %d.\n" ,
390
+ window - haystack );
391
+ return window - haystack ;
387
392
}
388
- LOG ("Left half does not match. Jump ahead by period %d.\n" , period );
389
- j += period ;
393
+ LOG ("Left half does not match. Jump ahead by period %d.\n" ,
394
+ period );
395
+ window += period ;
390
396
memory = len_needle - period ;
391
397
}
392
398
else {
393
- LOG ("Right half does not match. Jump ahead by %d.\n" , i - cut + 1 );
394
- j += i - cut + 1 ;
399
+ LOG ("Right half does not match. Jump ahead by %d.\n" ,
400
+ i - cut + 1 );
401
+ window += i - cut + 1 ;
395
402
memory = 0 ;
396
403
}
397
404
}
398
405
}
399
406
else {
400
407
period = Py_MAX (cut , len_needle - cut ) + 1 ;
401
408
LOG ("Needle is not periodic.\n" );
402
- Py_ssize_t j = 0 ;
403
409
assert (cut < len_needle );
404
410
STRINGLIB_CHAR needle_cut = needle [cut ];
405
- while (j <= len_haystack - len_needle ) {
411
+ while (window <= last_window ) {
406
412
407
413
// Visualize the line-up:
408
414
LOG ("> " ); LOG_STRING (haystack , len_haystack );
409
- LOG ("\n> " ); LOG ("%*s" , j , "" ); LOG_STRING (needle , len_needle );
410
- LOG ("\n> " ); LOG ("%*s" , j + cut , "" ); LOG (" ^ <-- cut\n" );
415
+ LOG ("\n> " ); LOG ("%*s" , window - haystack , "" );
416
+ LOG_STRING (needle , len_needle );
417
+ LOG ("\n> " ); LOG ("%*s" , window - haystack + cut , "" );
418
+ LOG (" ^ <-- cut\n" );
411
419
412
- if (haystack [ j + cut ] != needle_cut ) {
420
+ if (window [ cut ] != needle_cut ) {
413
421
// Sunday's trick: if we're going to jump, we might
414
422
// as well jump to line up the character *after* the
415
423
// current window.
416
- STRINGLIB_CHAR first_outside = haystack [ j + len_needle ];
424
+ STRINGLIB_CHAR first_outside = window [ len_needle ];
417
425
SHIFT_TYPE shift = p -> table [first_outside & TABLE_MASK ];
418
426
if (shift == NOT_FOUND ) {
419
- LOG ("\"%c\" not found. Skipping entirely.\n" , first_outside );
420
- j += len_needle + 1 ;
427
+ LOG ("\"%c\" not found. Skipping entirely.\n" ,
428
+ first_outside );
429
+ window += len_needle + 1 ;
421
430
}
422
431
else {
423
432
LOG ("Shifting to line up \"%c\".\n" , first_outside );
424
- j += shift ;
433
+ window += shift ;
425
434
}
426
435
continue ;
427
436
}
428
437
429
438
Py_ssize_t i = cut + 1 ;
430
- while (i < len_needle && needle [i ] == haystack [ j + i ]) {
439
+ while (i < len_needle && needle [i ] == window [ i ]) {
431
440
i ++ ;
432
441
}
433
442
if (i >= len_needle ) {
434
443
LOG ("Right half matches.\n" );
435
444
i = cut - 1 ;
FEEE
div>
436
- while (i >= 0 && needle [i ] == haystack [ j + i ]) {
445
+ while (i >= 0 && needle [i ] == window [ i ]) {
437
446
i -- ;
438
447
}
439
448
if (i < 0 ){
440
- LOG ("Left half matches. Returning %d.\n" , j );
441
- return j ;
449
+ LOG ("Left half matches. Returning %d.\n" ,
450
+ window - haystack );
451
+ return window - haystack ;
442
452
}
443
- LOG ("Left half does not match. Advance by period %d.\n" , period );
444
- j += period ;
453
+ LOG ("Left half does not match. Advance by period %d.\n" ,
454
+ period );
455
+ window += period ;
445
456
}
446
457
else {
447
- LOG ("Right half does not match. Advance by %d.\n" , i - cut + 1 );
448
- j += i - cut + 1 ;
458
+ LOG ("Right half does not match. Advance by %d.\n" ,
459
+ i - cut + 1 );
460
+ window += i - cut + 1 ;
449
461
}
450
462
}
451
463
}
@@ -459,7 +471,7 @@ STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
459
471
const STRINGLIB_CHAR * needle ,
460
472
Py_ssize_t len_needle )
461
473
{
462
- LOG ("##### Counting \"%s\" in \"%s\".\n" , needle , haystack );
474
+ LOG ("###### Finding \"%s\" in \"%s\".\n" , needle , haystack );
463
475
Py_ssize_t index ;
464
476
index = STRINGLIB (find_char )(haystack ,
465
477
len_haystack - len_needle + 1 ,
0 commit comments