8000 gh-119105: difflib: improve recursion for degenerate cases by pulkin · Pull Request #119131 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-119105: difflib: improve recursion for degenerate cases #119131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 19, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions Lib/difflib.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,33 +911,48 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):

# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
best_ratio, cutoff = (0.74, 0), 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)

# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
alen = alo + ahi - 1
blen = blo + bhi - 1
# weight is used to balance the recursion by prioritizing
# i and j in the middle of their ranges
weight = 0
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
if j < blen / 2:
weight += alen
elif j > blen / 2:
weight -= alen
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
if i < alen / 2:
weight += blen
elif i > alen / 2:
weight -= blen
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if (cruncher.real_quick_ratio(), weight) > best_ratio and \
(cruncher.quick_ratio(), weight) > best_ratio and \
(cruncher.ratio(), weight) > best_ratio:
best_ratio, best_i, best_j = (cruncher.ratio(), weight), i, j
# assert weight == 0, weight
best_ratio, _ = best_ratio
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
Expand Down
0