@@ -911,33 +911,47 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
911
911
912
912
# don't synch up unless the lines have a similarity score of at
913
913
# least cutoff; best_ratio tracks the best score seen so far
914
- best_ratio , cutoff = 0.74 , 0.75
914
+ # best_ratio is a tuple storing the best .ratio() seen so far, and
915
+ # a measure of how far the indices are from their index range
916
+ # midpoints. The latter is used to resolve ratio ties. Favoring
917
+ # indices near the midpoints tends to cut the ranges in half. Else,
918
+ # if there are many pairs with the best ratio, recursion can grow
919
+ # very deep, and runtime becomes cubic. See:
920
+ # https://github.com/python/cpython/issues/119105
921
+ best_ratio , cutoff = (0.74 , 0 ), 0.75
915
922
cruncher = SequenceMatcher (self .charjunk )
916
923
eqi , eqj = None , None # 1st indices of equal lines (if any)
917
924
918
925
# search for the pair that matches best without being identical
919
926
# (identical lines must be junk lines, & we don't want to synch up
920
927
# on junk -- unless we have to)
928
+ amid = (alo + ahi - 1 ) / 2
929
+ bmid = (blo + bhi - 1 ) / 2
921
930
for j in range (blo , bhi ):
922
931
bj = b [j ]
923
932
cruncher .set_seq2 (bj )
933
+ weight_j = - abs (j - bmid )
924
934
for i in range (alo , ahi ):
925
935
ai = a [i ]
926
936
if ai == bj :
927
937
if eqi is None :
928
938
eqi , eqj = i , j
929
939
continue
930
940
cruncher .set_seq1 (ai )
941
+ # weight is used to balance the recursion by prioritizing
942
+ # i and j in the middle of their ranges
943
+ weight = weight_j - abs (i - amid )
931
944
# computing similarity is expensive, so use the quick
932
945
# upper bounds first -- have seen this speed up messy
933
946
# compares by a factor of 3.
934
947
# note that ratio() is only expensive to compute the first
935
948
# time it's called on a sequence pair; the expensive part
936
949
# of the computation is cached by cruncher
937
- if cruncher .real_quick_ratio () > best_ratio and \
938
- cruncher .quick_ratio () > best_ratio and \
939
- cruncher .ratio () > best_ratio :
940
- best_ratio , best_i , best_j = cruncher .ratio (), i , j
950
+ if (cruncher .real_quick_ratio (), weight ) > best_ratio and \
951
+ (cruncher .quick_ratio (), weight ) > best_ratio and \
952
+ (cruncher .ratio (), weight ) > best_ratio :
953
+ best_ratio , best_i , best_j = (cruncher .ratio (), weight ), i , j
954
+ best_ratio , _ = best_ratio
941
955
if best_ratio < cutoff :
942
956
# no non-identical "pretty close" pair
943
957
if eqi is None :
0 commit comments