WIP - many problems.

python · tim-one · May 25, 2024 · May 23, 2024 · May 24, 2024 · May 24, 2024
commit e77d0fe0c7d87dcc27242e0dc704bb4ffa698573
diff --git a/Lib/difflib.py b/Lib/difflib.py
@@ -908,21 +908,16 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
         + abcdefGhijkl
         ?    ^  ^  ^
         """
-        from operator import ge, gt
-        # Don't synch up unless the lines have a similarity score of at
-        # least cutoff; best_ratio tracks the best score seen so far.
-        # Keep track of all index pairs achieving the best ratio and
-        # deal with them here. Previously only the smallest pair was
-        # handled here, and if there are many pairs with the best ratio,
-        # recursion could grow very deep, and runtime cubic. See:
+        # Don't synch up unless the lines have a similarity score above
+        # cutoff. Previously only the smallest pair was handled here,
+        # and if there are many pairs with the best ratio, recursion
+        # could grow very deep, and runtime cubic. See:
         # https://github.com/python/cpython/issues/119105
-        best_ratio, cutoff = 0.74, 0.75
+        cutoff = 0.74999
         cruncher = SequenceMatcher(self.charjunk)
-        eqi, eqj = None, None   # 1st indices of equal lines (if any)
         # List of index pairs achieving best_ratio. Strictly increasing
         # in both index positions.
         max_pairs = []
-        maxi = -1 # `i` index of last pair in max_pairs
 
         # search for the pair that matches best without being identical
         # (identical lines must be junk lines, & we don't want to synch
@@ -935,52 +930,39 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
             cruncher.set_seq2(bj)
             # Find new best, if possible. Else search for the smallest i
             # (if any) > maxi that equals the best ratio
-            search_equal = True
+            best_ratio = cutoff
+            best_i = None
             for i in range(alo, ahi):
                 ai = a[i]
                 if ai == bj:
-                    if eqi is None:
-                        eqi, eqj = i, j
+                    if best_i is None:
+                        best_i = i
                     continue
                 cruncher.set_seq1(ai)
                 # computing similarity is expensive, so use the quick
                 # upper bounds first -- have seen this speed up messy
                 # compares by a factor of 3.
-                cmp = ge if search_equal and i > maxi else gt
-                if (cmp(crqr(), best_ratio)
-                      and cmp(cqr(), best_ratio)
-                      and cmp((ratio := cr()), best_ratio)):
-                    if ratio > best_ratio:
-                        best_ratio = ratio
-                        max_pairs.clear()
-                    else:
-                        assert best_ratio == ratio and search_equal
-                        assert i > maxi
-                    max_pairs.append((i, j))
-                    maxi = i
-                    search_equal = False
-        if best_ratio < cutoff:
-            assert not max_pairs
-            # no non-identical "pretty close" pair
-            if eqi is None:
-                # no identical pair either -- treat it as a straight replace
-                yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
-                return
-            # no close pair, but an identical pair -- synch up on that
-            max_pairs = [(eqi, eqj)]
-        else:
-            # there's a close pair, so forget the identical pair (if any)
-            assert max_pairs
-            eqi = None
-
+                if (crqr() > best_ratio
+                      and cqr() > best_ratio
+                      and (ratio := cr()) > best_ratio):
+                    best_ratio = ratio
+                    best_i = i
+            if best_i is not None:
+                #assert not max_pairs or best_i > max_pairs[-1][0], (max_pairs, i, j)
+                if not max_pairs or best_i > max_pairs[-1][0]:
+                    max_pairs.append((best_i, j))
+        if not max_pairs:
+            yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
+
+        print(max_pairs)
         last_i, last_j = alo, blo
         for this_i, this_j in max_pairs:
             # pump out diffs from before the synch point
             yield from self._fancy_helper(a, last_i, this_i,
                                           b, last_j, this_j)
             # do intraline marking on the synch pair
             aelt, belt = a[this_i], b[this_j]
-            if eqi is None:
+            if aelt != belt:
                 # pump out a '-', '?', '+', '?' quad for the synched lines
                 atags = btags = ""
                 cruncher.set_seqs(aelt, belt)