From 20fde315b282b2853cde68a78c7502f4df942cad Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Mon, 17 Dec 2018 20:04:21 -0300 Subject: [PATCH 01/27] Fix bpo-19217 --- Lib/difflib.py | 6 +++++- Lib/unittest/case.py | 8 +++++--- Lib/unittest/test/test_case.py | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 887c3c26cae458..337a83d0ee1994 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1187,7 +1187,11 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', first, last = group[0], group[-1] file1_range = _format_range_unified(first[1], last[2]) file2_range = _format_range_unified(first[3], last[4]) - yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) + if isinstance(a, list) or isinstance(a, tuple) and \ + isinstance(b, list) or isinstance(b, tuple): + yield '{}'.format(lineterm) + else: + yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index a157ae8a14bcbe..f6a23afb01ee1d 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -1057,9 +1057,11 @@ def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None): differing += ('Unable to index element %d ' 'of second %s\n' % (len1, seq_type_name)) standardMsg = differing - diffMsg = '\n' + '\n'.join( - difflib.ndiff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines())) + diffMsg = difflib.unified_diff(pprint.pformat(seq1).splitlines(), + pprint.pformat(seq2).splitlines(), + fromfile='expected', tofile='got', + lineterm='') + diffMsg = '\n' + '\n'.join(diffMsg) standardMsg = self._truncateMessage(standardMsg, diffMsg) msg = self._formatMessage(msg, standardMsg) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index 687fe5b65f109e..b11da49b362dce 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -789,8 +789,10 @@ def testAssertSequenceEqualMaxDiff(self): self.assertEqual(self.maxDiff, 80*8) seq1 = 'a' + 'x' * 80**2 seq2 = 'b' + 'x' * 80**2 - diff = '\n'.join(difflib.ndiff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines())) + diff = difflib.unified_diff(pprint.pformat(seq1).splitlines(), + pprint.pformat(seq2).splitlines(), + lineterm='') + diff = '\n'.join(diff) # the +1 is the leading \n added by assertSequenceEqual omitted = unittest.case.DIFF_OMITTED % (len(diff) + 1,) From 6996d8a345a5e22768fbc299277fd133666a458b Mon Sep 17 00:00:00 2001 From: eamanu Date: Mon, 17 Dec 2018 23:05:36 -0300 Subject: [PATCH 02/27] fix unnecessary space and solve @gpshead comments --- Lib/difflib.py | 5 ++--- Lib/unittest/case.py | 6 +++--- Lib/unittest/test/test_case.py | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 337a83d0ee1994..0d2c4927847235 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1187,9 +1187,8 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', first, last = group[0], group[-1] file1_range = _format_range_unified(first[1], last[2]) file2_range = _format_range_unified(first[3], last[4]) - if isinstance(a, list) or isinstance(a, tuple) and \ - isinstance(b, list) or isinstance(b, tuple): - yield '{}'.format(lineterm) + if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): + yield lineterm else: yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index f6a23afb01ee1d..8be3af3297559f 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -1058,9 +1058,9 @@ def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None): 'of second %s\n' % (len1, seq_type_name)) standardMsg = differing diffMsg = difflib.unified_diff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines(), - fromfile='expected', tofile='got', - lineterm='') + pprint.pformat(seq2).splitlines(), + fromfile='expected', tofile='got', + lineterm='') diffMsg = '\n' + '\n'.join(diffMsg) standardMsg = self._truncateMessage(standardMsg, diffMsg) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index b11da49b362dce..68fbdef7665b9c 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -790,8 +790,8 @@ def testAssertSequenceEqualMaxDiff(self): seq1 = 'a' + 'x' * 80**2 seq2 = 'b' + 'x' * 80**2 diff = difflib.unified_diff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines(), - lineterm='') + pprint.pformat(seq2).splitlines(), + lineterm='') diff = '\n'.join(diff) # the +1 is the leading \n added by assertSequenceEqual omitted = unittest.case.DIFF_OMITTED % (len(diff) + 1,) From adfcf6d2bdd66f13f6ff798fce852e401b3dcb84 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Tue, 18 Dec 2018 09:54:32 -0300 Subject: [PATCH 03/27] fix test --- Lib/unittest/test/test_assertions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/unittest/test/test_assertions.py b/Lib/unittest/test/test_assertions.py index f5e64d68e7b101..14a8f231e1c023 100644 --- a/Lib/unittest/test/test_assertions.py +++ b/Lib/unittest/test/test_assertions.py @@ -242,8 +242,8 @@ def testAssertSequenceEqual(self): # Error messages are multiline so not testing on full message # assertTupleEqual and assertListEqual delegate to this method self.assertMessages('assertSequenceEqual', ([], [None]), - [r"\+ \[None\]$", "^oops$", r"\+ \[None\]$", - r"\+ \[None\] : oops$"]) + [r"\+\[None\]$", "^oops$", r"\+\[None\]$", + r"\+\[None\] : oops$"]) def testAssertSetEqual(self): self.assertMessages('assertSetEqual', (set(), set([None])), From 81900d4aea75aaa3a02d247433eb21193a583f52 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 22 Jul 2021 15:21:18 -0400 Subject: [PATCH 04/27] bpo-19217: fix failing test in unittest's test suite --- Lib/unittest/test/test_case.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index f1ef73fec33a3d..e2a54618b3bb07 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -801,6 +801,7 @@ def testAssertSequenceEqualMaxDiff(self): seq2 = 'b' + 'x' * 80**2 diff = difflib.unified_diff(pprint.pformat(seq1).splitlines(), pprint.pformat(seq2).splitlines(), + fromfile='expected', tofile='got', lineterm='') diff = '\n'.join(diff) # the +1 is the leading \n added by assertSequenceEqual From e5cb7bcebbdd24ab53cab7d1201a5cfad50c85b3 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 22 Jul 2021 15:22:13 -0400 Subject: [PATCH 05/27] bpo-19217: add blurb --- .../next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst diff --git a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst new file mode 100644 index 00000000000000..63431453071b8f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst @@ -0,0 +1,2 @@ +Optimize unittest.TestCase.assertEqual method for long lists of varied +items. From 6955d814b31a4b1f11a34b8b516f8cccc8f91966 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 22 Jul 2021 16:22:40 -0400 Subject: [PATCH 06/27] revert changes to difflib.py --- Lib/difflib.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 624d6cfb12487e..0dda80d3875739 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1186,10 +1186,7 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', first, last = group[0], group[-1] file1_range = _format_range_unified(first[1], last[2]) file2_range = _format_range_unified(first[3], last[4]) - if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): - yield lineterm - else: - yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) + yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': From 46f5e299a9bb6c40586f4da43649238839900557 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 22 Jul 2021 16:36:11 -0400 Subject: [PATCH 07/27] add regression test --- Lib/unittest/test/test_case.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index e2a54618b3bb07..631b7150b5b589 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -8,6 +8,7 @@ import warnings import weakref import inspect +import time import types from copy import deepcopy @@ -838,6 +839,36 @@ def testAssertSequenceEqualMaxDiff(self): self.assertGreater(len(msg), len(diff)) self.assertNotIn(omitted, msg) + def testAssertEqualModeratelyLongSequencePerformance(self): + """Before fixing bpo-19217, assertTrue on different sequences of the + same length would never finish in circumstances like the ones below.""" + + TIME_LIMIT = 1 + + # test comparison of strings + arr1 = 'a' * 10000 + arr2 = 'a' * 9999 + 'b' + start = time.time() + try: + self.assertEqual(arr1, arr2) + except self.failureException: + pass + finish = time.time() + if finish - start > TIME_LIMIT: + self.fail('String comparison took longer than one second') + + # test comparison of lists + arr1 = [1] * 10000 + arr2 = ([1] * 9999) + [2] + start = time.time() + try: + self.assertEqual(arr1, arr2) + except self.failureException: + pass + finish = time.time() + if finish - start > TIME_LIMIT: + self.fail('List comparison took longer than one second') + def testTruncateMessage(self): self.maxDiff = 1 message = self._truncateMessage('foo', 'bar') From 856029ded377ac36ede7fefee2c22018ac72b9ce Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Tue, 3 Aug 2021 23:20:57 -0400 Subject: [PATCH 08/27] draft implementation of unittest.case._heuristic_diff * Now, we switch between difflib.ndiff and difflib.unified_diff based on input size. * Threshold is sort of arbitrary, but seems to be working in the limited test cases I've written * The full test suite is passing, with only very minor tweaks to existing tests! --- Lib/unittest/case.py | 41 ++++++- Lib/unittest/test/test_assertions.py | 4 +- Lib/unittest/test/test_case.py | 157 +++++++++++++++++++++------ 3 files changed, 160 insertions(+), 42 deletions(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index b53dde21ca1899..884aa836d0e60a 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -5,6 +5,7 @@ import difflib import pprint import re +from typing import Iterable import warnings import collections import contextlib @@ -153,6 +154,36 @@ def _is_subtype(expected, basetype): return all(_is_subtype(e, basetype) for e in expected) return isinstance(expected, type) and issubclass(expected, basetype) + +def _heuristic_diff(a: list[str], b: list[str]) -> Iterable[str]: + """After testing the magnitude of the inputs, preferably return the output + of difflib.ndiff, but fallback to difflib.unified_diff for prohibitively + expensive inputs. How cost is calclated: + + cost = (number of differing lines + * total length of all differing lines) + """ + + # @ambv: I just deduced this number from guess and check.... is there a + # better way? + cost_limit = 1_000_000 + + udiff = [l for l in difflib.unified_diff(a, b, + fromfile='expected', + tofile='got')] + udiff_differing_lines = [l for l in udiff + if l.startswith('-') or l.startswith('+')] + num_difflines = len(udiff_differing_lines) + total_diffline_length = sum(len(l) for l in udiff_differing_lines) + + diff_cost = num_difflines * total_diffline_length + + if diff_cost > cost_limit: + yield from udiff + else: + yield from difflib.ndiff(a, b) + + class _BaseTestCaseContext: def __init__(self, test_case): @@ -1018,10 +1049,8 @@ def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None): differing += ('Unable to index element %d ' 'of second %s\n' % (len1, seq_type_name)) standardMsg = differing - diffMsg = difflib.unified_diff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines(), - fromfile='expected', tofile='got', - lineterm='') + diffMsg = _heuristic_diff(pprint.pformat(seq1).splitlines(), + pprint.pformat(seq2).splitlines()) diffMsg = '\n' + '\n'.join(diffMsg) standardMsg = self._truncateMessage(standardMsg, diffMsg) @@ -1133,7 +1162,7 @@ def assertDictEqual(self, d1, d2, msg=None): if d1 != d2: standardMsg = '%s != %s' % _common_shorten_repr(d1, d2) - diff = ('\n' + '\n'.join(difflib.ndiff( + diff = ('\n' + '\n'.join(_heuristic_diff( pprint.pformat(d1).splitlines(), pprint.pformat(d2).splitlines()))) standardMsg = self._truncateMessage(standardMsg, diff) @@ -1216,7 +1245,7 @@ def assertMultiLineEqual(self, first, second, msg=None): firstlines = [first + '\n'] secondlines = [second + '\n'] standardMsg = '%s != %s' % _common_shorten_repr(first, second) - diff = '\n' + ''.join(difflib.ndiff(firstlines, secondlines)) + diff = '\n' + ''.join(_heuristic_diff(firstlines, secondlines)) standardMsg = self._truncateMessage(standardMsg, diff) self.fail(self._formatMessage(msg, standardMsg)) diff --git a/Lib/unittest/test/test_assertions.py b/Lib/unittest/test/test_assertions.py index 14a8f231e1c023..f5e64d68e7b101 100644 --- a/Lib/unittest/test/test_assertions.py +++ b/Lib/unittest/test/test_assertions.py @@ -242,8 +242,8 @@ def testAssertSequenceEqual(self): # Error messages are multiline so not testing on full message # assertTupleEqual and assertListEqual delegate to this method self.assertMessages('assertSequenceEqual', ([], [None]), - [r"\+\[None\]$", "^oops$", r"\+\[None\]$", - r"\+\[None\] : oops$"]) + [r"\+ \[None\]$", "^oops$", r"\+ \[None\]$", + r"\+ \[None\] : oops$"]) def testAssertSetEqual(self): self.assertMessages('assertSetEqual', (set(), set([None])), diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index 631b7150b5b589..5b14c01403c8ce 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -5,6 +5,7 @@ import re import sys import logging +from typing import Tuple import warnings import weakref import inspect @@ -55,6 +56,126 @@ def tearDown(self): self.events.append('tearDown') +class Test_HeuristicDiff(unittest.TestCase): + + N = 50_000 + + def check(self, a, b, expect: Tuple[str, ...]): + """That _heuristic_diff(a, b) == expect""" + + # calls to _heuristic_diff are annotated with "# type: ignore" + # throughout. It is not a member of unittest.case.__all__, so the + # comment silences complaining type checkers. + + diff_iterable = unittest.case._heuristic_diff(a, b) # type: ignore + diff = tuple(diff_iterable) + + # just check equality because if this is broken, the diff message from + # assertEqual is probably useless! + self.assertTrue(diff == expect) + + + ########################################################################### + + # @ambv: I feel that the tests between the lines here are redundant, given + # the other tests below. Removing them shaves ~1s of runtime off. + # what do you think? + + def test_ndiff_is_used_with_small_inputs(self): + a = ('foo',) + b = ('bar',) + expect = ('- foo', '+ bar') + self.check(a, b, expect) + + def test_unified_diff_is_used_with_large_inputs(self): + """One long line, as well as many single-character lines.""" + + # one long line + a = ('foo' * self.N,) + b = ('bar' * self.N,) + expect = ('--- expected\n', '+++ got\n', '@@ -1 +1 @@\n', + '-' + 'foo' * self.N, + '+' + 'bar' * self.N) + self.check(a, b, expect) + + # many lines + a = ('1\n' * self.N).splitlines() + b = ('2\n' * self.N).splitlines() + expect = ('--- expected\n', '+++ got\n', f'@@ -1,{self.N} +1,{self.N} @@\n', + *(['-1'] * self.N), + *(['+2'] * self.N)) + self.check(a, b, expect) + + ########################################################################### + + def test_ndiff_to_unified_diff_breaking_point_long_line(self): + """This is the approximate single line length at which the heuristic + will switch from ndiff to unified_diff.""" + expect_switch_at = 125_000 + a = '' + b = '' + n = expect_switch_at // 2 + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + n *= 1.3 + a = ('a' * int(n),) + b = ('b' * int(n),) + + self.assertGreater(n, expect_switch_at * 0.8) + self.assertLess(n, expect_switch_at * 1.1) + + def test_ndiff_to_unified_diff_breaking_point_many_lines(self): + """For lines just one character long, the heuristic will switch from + ndiff to unified_diff around 70,000 differing lines.""" + expect_switch_at = 70_000 + a = '' + b = '' + n = expect_switch_at // 2 + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + n *= 1.3 + a = ('a\n' * int(n),) + b = ('b\n' * int(n),) + + self.assertGreater(n, expect_switch_at * 0.8) + self.assertLess(n, expect_switch_at * 1.1) + + def test_ndiff_to_unified_diff_breaking_point_varied_inputs(self): + """Specify how the heuristic behaves with varied inputs and edge cases.""" + # @ambv: I just have this one scaffoleded out in comments. I'd like to + # know your thoughts in general before I keep going. + + # scale line length and width at different rates to see what happens + + # for n in range(??): + # line_length = n + # line_width = n * 2 + # ... test heuristic + + # line_length = n * 2 + # line_width = n + # ... test heuristic + + # line_length = n * 3 + # line_width = n + # ... test heuristic + + # line_length = n + # line_width = n * 3 + # ... test heuristic + + def test_ndiff_is_always_used_for_similar_sequences(self): + """ndiff is perfectly efficient at showing small diffs. As long as + the difference between `a` and `b` are small, the size of `a` and `b` + should not disqualify the use of ndiff.""" + a = ('foo ' * 5 + '\n') * 10_000 + b = ('foo ' * 5 + '\n') * 9_999 + ('bar ' * 5 + '\n') + + diff = ''.join(unittest.case._heuristic_diff(a, b)) # type: ignore + + # checking for '@' is an easy way to see if unified_diff was used, + # because it always has the "@@ ... @@" line. + self.assertNotIn('@', diff) + + class Test_TestCase(unittest.TestCase, TestEquality, TestHashing): ### Set up attributes used by inherited tests @@ -800,10 +921,8 @@ def testAssertSequenceEqualMaxDiff(self): self.assertEqual(self.maxDiff, 80*8) seq1 = 'a' + 'x' * 80**2 seq2 = 'b' + 'x' * 80**2 - diff = difflib.unified_diff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines(), - fromfile='expected', tofile='got', - lineterm='') + diff = unittest.case._heuristic_diff(pprint.pformat(seq1).splitlines(), # type: ignore + pprint.pformat(seq2).splitlines()) diff = '\n'.join(diff) # the +1 is the leading \n added by assertSequenceEqual omitted = unittest.case.DIFF_OMITTED % (len(diff) + 1,) @@ -839,36 +958,6 @@ def testAssertSequenceEqualMaxDiff(self): self.assertGreater(len(msg), len(diff)) self.assertNotIn(omitted, msg) - def testAssertEqualModeratelyLongSequencePerformance(self): - """Before fixing bpo-19217, assertTrue on different sequences of the - same length would never finish in circumstances like the ones below.""" - - TIME_LIMIT = 1 - - # test comparison of strings - arr1 = 'a' * 10000 - arr2 = 'a' * 9999 + 'b' - start = time.time() - try: - self.assertEqual(arr1, arr2) - except self.failureException: - pass - finish = time.time() - if finish - start > TIME_LIMIT: - self.fail('String comparison took longer than one second') - - # test comparison of lists - arr1 = [1] * 10000 - arr2 = ([1] * 9999) + [2] - start = time.time() - try: - self.assertEqual(arr1, arr2) - except self.failureException: - pass - finish = time.time() - if finish - start > TIME_LIMIT: - self.fail('List comparison took longer than one second') - def testTruncateMessage(self): self.maxDiff = 1 message = self._truncateMessage('foo', 'bar') From 1dd1bcd89a1703a92298a7987f711812aa1fff79 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Tue, 3 Aug 2021 23:41:53 -0400 Subject: [PATCH 09/27] fix: remove now-unused imports from test_case.py --- Lib/unittest/test/test_case.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index 5b14c01403c8ce..d4ef4d2778b52a 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -1,5 +1,4 @@ import contextlib -import difflib import pprint import pickle import re @@ -9,7 +8,6 @@ import warnings import weakref import inspect -import time import types from copy import deepcopy From 988740aed0c6ebf86594e2391354275487ec9b24 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 12 Aug 2021 17:19:00 -0400 Subject: [PATCH 10/27] add variably scaled test cases, misc updates & revisions --- Lib/unittest/case.py | 5 +- Lib/unittest/test/test_case.py | 213 +++++++++++++++++++++++++-------- 2 files changed, 166 insertions(+), 52 deletions(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 884aa836d0e60a..9be056d1adc6ca 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -163,11 +163,12 @@ def _heuristic_diff(a: list[str], b: list[str]) -> Iterable[str]: cost = (number of differing lines * total length of all differing lines) """ + # bpo-19217: speed up assertEqual on long sequences - # @ambv: I just deduced this number from guess and check.... is there a - # better way? cost_limit = 1_000_000 + # unified diff is always cheap, so we can use it to measure the magnitude + # of the differences, which is a proxy for the cost of difflib.ndiff udiff = [l for l in difflib.unified_diff(a, b, fromfile='expected', tofile='got')] diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index d4ef4d2778b52a..d1875ed22a26f2 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -1,14 +1,16 @@ import contextlib +from dataclasses import dataclass +import difflib import pprint import pickle import re import sys import logging -from typing import Tuple import warnings import weakref import inspect import types +from typing import Iterator from copy import deepcopy from test import support @@ -56,45 +58,45 @@ def tearDown(self): class Test_HeuristicDiff(unittest.TestCase): + # this large contant coerces the use of `unified_diff` for several tests N = 50_000 - def check(self, a, b, expect: Tuple[str, ...]): - """That _heuristic_diff(a, b) == expect""" - - # calls to _heuristic_diff are annotated with "# type: ignore" - # throughout. It is not a member of unittest.case.__all__, so the - # comment silences complaining type checkers. - + @staticmethod + def is_unified_diff(diff: Iterator[str]) -> bool: + """Check for the presence of the @@ ... @@ diff summary line.""" + diffstr = ''.join(diff) + p = r'@@ -(\d(,)?(\d)?)+ \+(\d(,)?(\d)?)+ @@' + mo = re.search(p, diffstr) + return bool(mo) + + def test_is_unified_diff(self): + """Test the helper above""" + ud = difflib.unified_diff('foo', 'bar') + nd = difflib.ndiff('foo', 'bar') + self.assertTrue(self.is_unified_diff(ud)) + self.assertFalse(self.is_unified_diff(nd)) + + def assertHeuristicDiffReturns(self, a, b, expect: tuple[str, ...]): + """check that _heuristic_diff(a, b) == expect""" diff_iterable = unittest.case._heuristic_diff(a, b) # type: ignore diff = tuple(diff_iterable) - - # just check equality because if this is broken, the diff message from - # assertEqual is probably useless! self.assertTrue(diff == expect) - - ########################################################################### - - # @ambv: I feel that the tests between the lines here are redundant, given - # the other tests below. Removing them shaves ~1s of runtime off. - # what do you think? - def test_ndiff_is_used_with_small_inputs(self): a = ('foo',) b = ('bar',) expect = ('- foo', '+ bar') - self.check(a, b, expect) + self.assertHeuristicDiffReturns(a, b, expect) def test_unified_diff_is_used_with_large_inputs(self): """One long line, as well as many single-character lines.""" - # one long line a = ('foo' * self.N,) b = ('bar' * self.N,) expect = ('--- expected\n', '+++ got\n', '@@ -1 +1 @@\n', '-' + 'foo' * self.N, '+' + 'bar' * self.N) - self.check(a, b, expect) + self.assertHeuristicDiffReturns(a, b, expect) # many lines a = ('1\n' * self.N).splitlines() @@ -102,9 +104,7 @@ def test_unified_diff_is_used_with_large_inputs(self): expect = ('--- expected\n', '+++ got\n', f'@@ -1,{self.N} +1,{self.N} @@\n', *(['-1'] * self.N), *(['+2'] * self.N)) - self.check(a, b, expect) - - ########################################################################### + self.assertHeuristicDiffReturns(a, b, expect) def test_ndiff_to_unified_diff_breaking_point_long_line(self): """This is the approximate single line length at which the heuristic @@ -136,29 +136,143 @@ def test_ndiff_to_unified_diff_breaking_point_many_lines(self): self.assertGreater(n, expect_switch_at * 0.8) self.assertLess(n, expect_switch_at * 1.1) - def test_ndiff_to_unified_diff_breaking_point_varied_inputs(self): - """Specify how the heuristic behaves with varied inputs and edge cases.""" - # @ambv: I just have this one scaffoleded out in comments. I'd like to - # know your thoughts in general before I keep going. - - # scale line length and width at different rates to see what happens - - # for n in range(??): - # line_length = n - # line_width = n * 2 - # ... test heuristic - - # line_length = n * 2 - # line_width = n - # ... test heuristic + def test_ndiff_to_unified_diff_scaled_line_and_cols(self): + """Scale line length and number of differing columns at different + rates, expecting a switch to `unified_diff` at specified points. + """ - # line_length = n * 3 - # line_width = n - # ... test heuristic + @dataclass + class Case: + """Case class specifies parameters for all tests.""" + line_length_factor: int + num_lines_factor: int + extent_differing: float + + # this is a "magic number" for all cases where the heuristic + # will switch from using ndiff to unified_diff. + expect_unified_diff_at: int + + # --- Layout test cases + # --------------------- + + cases = ( + # scale width and length by ratios of 2:1 + Case( + line_length_factor=1, + num_lines_factor=2, + extent_differing=1, + expect_unified_diff_at = 22, + ), + Case( + line_length_factor=2, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 28, + ), + + # scale width and length by ratios of 3:1 + Case( + line_length_factor=1, + num_lines_factor=3, + extent_differing=1, + expect_unified_diff_at = 16, + ), + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 24, + ), + + # # scale by ratios of 3:1, with only 40% differing + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 24, + ), + Case( + line_length_factor=1, + num_lines_factor=3, + extent_differing=0.4, + expect_unified_diff_at = 16, + ), + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=0.4, + expect_unified_diff_at = 23, + ), + ) - # line_length = n - # line_width = n * 3 - # ... test heuristic + # --- Execute test cases + # ---------------------- + + def run_case(case: Case, N): + """Given one of the test cases above, execute the test case for a + given `N` constant value. Check if the test has passed as + specified.""" + + # we are working out way towards _heuristic_diff(foo, bar) + + # --- Construct Differing Strings --- + + # construct foo. Double line count because bar will have twice + # as many lines (lines of 'a' and lines of 'b') + foo = ( + ('a' * N * (case.line_length_factor),) # create line + * (N * case.num_lines_factor * 2) # duplicate line + ) + + # construct bar + bar_a_line = ('a' * (N * case.line_length_factor)) + bar_b_line = ('b' * (N * case.line_length_factor)) + bar_a_lines = ((bar_a_line,) * (N * case.num_lines_factor)) + if case.extent_differing != 1: + # diminish the amount of 'b' by case.extent_differing, and add + # additional 'a' at the end as padding + bar_b_lines = ((bar_b_line,) + * int(N + * case.num_lines_factor + * case.extent_differing)) + bar_a_padding = ((bar_a_line,) + * int(N + * case.num_lines_factor + * (1 - case.extent_differing))) + bar = tuple((*bar_a_lines, *bar_b_lines, *bar_a_padding)) + else: + bar_b_lines = ((bar_b_line,) * int(N * case.num_lines_factor)) + bar = tuple((*bar_a_lines, *bar_b_lines)) + + # --- Perform Diff --- + + # after all that, we have `foo` and `bar`; two string sequences + # with differences as specified by the Case parameters, scaled + # by a factor of `N`. + diff = unittest.case._heuristic_diff(foo, bar) # type: ignore + + # --- Make Assertions --- + + # now, check that the `case.expect_unified_diff_at` condition was + # met + if ( + N < case.expect_unified_diff_at + and self.is_unified_diff(diff) + ): + self.fail('Switched to `unified_diff` prematurely. Expected ' + f'switch at {case.expect_unified_diff_at}, but ' + f'actually switched at {N} for the case {case}') + elif ( + N > case.expect_unified_diff_at + and not self.is_unified_diff(diff) + ): + self.fail('Switch to `unified_diff` did not occur. Expected ' + 'switch at {case.expect_unified_diff_at}, but no ' + f'switch occured when N == {N}') + + for case in cases: + for N in range(10, case.expect_unified_diff_at + 1): + run_case(case, N) def test_ndiff_is_always_used_for_similar_sequences(self): """ndiff is perfectly efficient at showing small diffs. As long as @@ -167,11 +281,8 @@ def test_ndiff_is_always_used_for_similar_sequences(self): a = ('foo ' * 5 + '\n') * 10_000 b = ('foo ' * 5 + '\n') * 9_999 + ('bar ' * 5 + '\n') - diff = ''.join(unittest.case._heuristic_diff(a, b)) # type: ignore - - # checking for '@' is an easy way to see if unified_diff was used, - # because it always has the "@@ ... @@" line. - self.assertNotIn('@', diff) + diff = unittest.case._heuristic_diff(a, b) # type: ignore + self.assertFalse(self.is_unified_diff(diff)) class Test_TestCase(unittest.TestCase, TestEquality, TestHashing): @@ -2071,5 +2182,7 @@ def test2(self): self.assertEqual(MyException.ninstance, 0) + + if __name__ == "__main__": unittest.main() From 4661f750036b417d80505dc42b8e3bc2393e35b0 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 12 Aug 2021 17:20:20 -0400 Subject: [PATCH 11/27] move Test_HeuristicDiff beneath main tests --- Lib/unittest/test/test_case.py | 456 ++++++++++++++++----------------- 1 file changed, 227 insertions(+), 229 deletions(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index d1875ed22a26f2..e393274416e7c8 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -56,235 +56,6 @@ def tearDown(self): self.events.append('tearDown') -class Test_HeuristicDiff(unittest.TestCase): - - # this large contant coerces the use of `unified_diff` for several tests - N = 50_000 - - @staticmethod - def is_unified_diff(diff: Iterator[str]) -> bool: - """Check for the presence of the @@ ... @@ diff summary line.""" - diffstr = ''.join(diff) - p = r'@@ -(\d(,)?(\d)?)+ \+(\d(,)?(\d)?)+ @@' - mo = re.search(p, diffstr) - return bool(mo) - - def test_is_unified_diff(self): - """Test the helper above""" - ud = difflib.unified_diff('foo', 'bar') - nd = difflib.ndiff('foo', 'bar') - self.assertTrue(self.is_unified_diff(ud)) - self.assertFalse(self.is_unified_diff(nd)) - - def assertHeuristicDiffReturns(self, a, b, expect: tuple[str, ...]): - """check that _heuristic_diff(a, b) == expect""" - diff_iterable = unittest.case._heuristic_diff(a, b) # type: ignore - diff = tuple(diff_iterable) - self.assertTrue(diff == expect) - - def test_ndiff_is_used_with_small_inputs(self): - a = ('foo',) - b = ('bar',) - expect = ('- foo', '+ bar') - self.assertHeuristicDiffReturns(a, b, expect) - - def test_unified_diff_is_used_with_large_inputs(self): - """One long line, as well as many single-character lines.""" - # one long line - a = ('foo' * self.N,) - b = ('bar' * self.N,) - expect = ('--- expected\n', '+++ got\n', '@@ -1 +1 @@\n', - '-' + 'foo' * self.N, - '+' + 'bar' * self.N) - self.assertHeuristicDiffReturns(a, b, expect) - - # many lines - a = ('1\n' * self.N).splitlines() - b = ('2\n' * self.N).splitlines() - expect = ('--- expected\n', '+++ got\n', f'@@ -1,{self.N} +1,{self.N} @@\n', - *(['-1'] * self.N), - *(['+2'] * self.N)) - self.assertHeuristicDiffReturns(a, b, expect) - - def test_ndiff_to_unified_diff_breaking_point_long_line(self): - """This is the approximate single line length at which the heuristic - will switch from ndiff to unified_diff.""" - expect_switch_at = 125_000 - a = '' - b = '' - n = expect_switch_at // 2 - while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore - n *= 1.3 - a = ('a' * int(n),) - b = ('b' * int(n),) - - self.assertGreater(n, expect_switch_at * 0.8) - self.assertLess(n, expect_switch_at * 1.1) - - def test_ndiff_to_unified_diff_breaking_point_many_lines(self): - """For lines just one character long, the heuristic will switch from - ndiff to unified_diff around 70,000 differing lines.""" - expect_switch_at = 70_000 - a = '' - b = '' - n = expect_switch_at // 2 - while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore - n *= 1.3 - a = ('a\n' * int(n),) - b = ('b\n' * int(n),) - - self.assertGreater(n, expect_switch_at * 0.8) - self.assertLess(n, expect_switch_at * 1.1) - - def test_ndiff_to_unified_diff_scaled_line_and_cols(self): - """Scale line length and number of differing columns at different - rates, expecting a switch to `unified_diff` at specified points. - """ - - @dataclass - class Case: - """Case class specifies parameters for all tests.""" - line_length_factor: int - num_lines_factor: int - extent_differing: float - - # this is a "magic number" for all cases where the heuristic - # will switch from using ndiff to unified_diff. - expect_unified_diff_at: int - - # --- Layout test cases - # --------------------- - - cases = ( - # scale width and length by ratios of 2:1 - Case( - line_length_factor=1, - num_lines_factor=2, - extent_differing=1, - expect_unified_diff_at = 22, - ), - Case( - line_length_factor=2, - num_lines_factor=1, - extent_differing=1, - expect_unified_diff_at = 28, - ), - - # scale width and length by ratios of 3:1 - Case( - line_length_factor=1, - num_lines_factor=3, - extent_differing=1, - expect_unified_diff_at = 16, - ), - Case( - line_length_factor=3, - num_lines_factor=1, - extent_differing=1, - expect_unified_diff_at = 24, - ), - - # # scale by ratios of 3:1, with only 40% differing - Case( - line_length_factor=3, - num_lines_factor=1, - extent_differing=1, - expect_unified_diff_at = 24, - ), - Case( - line_length_factor=1, - num_lines_factor=3, - extent_differing=0.4, - expect_unified_diff_at = 16, - ), - Case( - line_length_factor=3, - num_lines_factor=1, - extent_differing=0.4, - expect_unified_diff_at = 23, - ), - ) - - # --- Execute test cases - # ---------------------- - - def run_case(case: Case, N): - """Given one of the test cases above, execute the test case for a - given `N` constant value. Check if the test has passed as - specified.""" - - # we are working out way towards _heuristic_diff(foo, bar) - - # --- Construct Differing Strings --- - - # construct foo. Double line count because bar will have twice - # as many lines (lines of 'a' and lines of 'b') - foo = ( - ('a' * N * (case.line_length_factor),) # create line - * (N * case.num_lines_factor * 2) # duplicate line - ) - - # construct bar - bar_a_line = ('a' * (N * case.line_length_factor)) - bar_b_line = ('b' * (N * case.line_length_factor)) - bar_a_lines = ((bar_a_line,) * (N * case.num_lines_factor)) - if case.extent_differing != 1: - # diminish the amount of 'b' by case.extent_differing, and add - # additional 'a' at the end as padding - bar_b_lines = ((bar_b_line,) - * int(N - * case.num_lines_factor - * case.extent_differing)) - bar_a_padding = ((bar_a_line,) - * int(N - * case.num_lines_factor - * (1 - case.extent_differing))) - bar = tuple((*bar_a_lines, *bar_b_lines, *bar_a_padding)) - else: - bar_b_lines = ((bar_b_line,) * int(N * case.num_lines_factor)) - bar = tuple((*bar_a_lines, *bar_b_lines)) - - # --- Perform Diff --- - - # after all that, we have `foo` and `bar`; two string sequences - # with differences as specified by the Case parameters, scaled - # by a factor of `N`. - diff = unittest.case._heuristic_diff(foo, bar) # type: ignore - - # --- Make Assertions --- - - # now, check that the `case.expect_unified_diff_at` condition was - # met - if ( - N < case.expect_unified_diff_at - and self.is_unified_diff(diff) - ): - self.fail('Switched to `unified_diff` prematurely. Expected ' - f'switch at {case.expect_unified_diff_at}, but ' - f'actually switched at {N} for the case {case}') - elif ( - N > case.expect_unified_diff_at - and not self.is_unified_diff(diff) - ): - self.fail('Switch to `unified_diff` did not occur. Expected ' - 'switch at {case.expect_unified_diff_at}, but no ' - f'switch occured when N == {N}') - - for case in cases: - for N in range(10, case.expect_unified_diff_at + 1): - run_case(case, N) - - def test_ndiff_is_always_used_for_similar_sequences(self): - """ndiff is perfectly efficient at showing small diffs. As long as - the difference between `a` and `b` are small, the size of `a` and `b` - should not disqualify the use of ndiff.""" - a = ('foo ' * 5 + '\n') * 10_000 - b = ('foo ' * 5 + '\n') * 9_999 + ('bar ' * 5 + '\n') - - diff = unittest.case._heuristic_diff(a, b) # type: ignore - self.assertFalse(self.is_unified_diff(diff)) - - class Test_TestCase(unittest.TestCase, TestEquality, TestHashing): ### Set up attributes used by inherited tests @@ -2182,6 +1953,233 @@ def test2(self): self.assertEqual(MyException.ninstance, 0) +class Test_HeuristicDiff(unittest.TestCase): + + # this large contant coerces the use of `unified_diff` for several tests + N = 50_000 + + @staticmethod + def is_unified_diff(diff: Iterator[str]) -> bool: + """Check for the presence of the @@ ... @@ diff summary line.""" + diffstr = ''.join(diff) + p = r'@@ -(\d(,)?(\d)?)+ \+(\d(,)?(\d)?)+ @@' + mo = re.search(p, diffstr) + return bool(mo) + + def test_is_unified_diff(self): + """Test the helper above""" + ud = difflib.unified_diff('foo', 'bar') + nd = difflib.ndiff('foo', 'bar') + self.assertTrue(self.is_unified_diff(ud)) + self.assertFalse(self.is_unified_diff(nd)) + + def assertHeuristicDiffReturns(self, a, b, expect: tuple[str, ...]): + """check that _heuristic_diff(a, b) == expect""" + diff_iterable = unittest.case._heuristic_diff(a, b) # type: ignore + diff = tuple(diff_iterable) + self.assertTrue(diff == expect) + + def test_ndiff_is_used_with_small_inputs(self): + a = ('foo',) + b = ('bar',) + expect = ('- foo', '+ bar') + self.assertHeuristicDiffReturns(a, b, expect) + + def test_unified_diff_is_used_with_large_inputs(self): + """One long line, as well as many single-character lines.""" + # one long line + a = ('foo' * self.N,) + b = ('bar' * self.N,) + expect = ('--- expected\n', '+++ got\n', '@@ -1 +1 @@\n', + '-' + 'foo' * self.N, + '+' + 'bar' * self.N) + self.assertHeuristicDiffReturns(a, b, expect) + + # many lines + a = ('1\n' * self.N).splitlines() + b = ('2\n' * self.N).splitlines() + expect = ('--- expected\n', '+++ got\n', f'@@ -1,{self.N} +1,{self.N} @@\n', + *(['-1'] * self.N), + *(['+2'] * self.N)) + self.assertHeuristicDiffReturns(a, b, expect) + + def test_ndiff_to_unified_diff_breaking_point_long_line(self): + """This is the approximate single line length at which the heuristic + will switch from ndiff to unified_diff.""" + expect_switch_at = 125_000 + a = '' + b = '' + n = expect_switch_at // 2 + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + n *= 1.3 + a = ('a' * int(n),) + b = ('b' * int(n),) + + self.assertGreater(n, expect_switch_at * 0.8) + self.assertLess(n, expect_switch_at * 1.1) + + def test_ndiff_to_unified_diff_breaking_point_many_lines(self): + """For lines just one character long, the heuristic will switch from + ndiff to unified_diff around 70,000 differing lines.""" + expect_switch_at = 70_000 + a = '' + b = '' + n = expect_switch_at // 2 + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + n *= 1.3 + a = ('a\n' * int(n),) + b = ('b\n' * int(n),) + + self.assertGreater(n, expect_switch_at * 0.8) + self.assertLess(n, expect_switch_at * 1.1) + + def test_ndiff_to_unified_diff_scaled_line_and_cols(self): + """Scale line length and number of differing columns at different + rates, expecting a switch to `unified_diff` at specified points. + """ + + @dataclass + class Case: + """Case class specifies parameters for all tests.""" + line_length_factor: int + num_lines_factor: int + extent_differing: float + + # this is a "magic number" for all cases where the heuristic + # will switch from using ndiff to unified_diff. + expect_unified_diff_at: int + + # --- Layout test cases + # --------------------- + + cases = ( + # scale width and length by ratios of 2:1 + Case( + line_length_factor=1, + num_lines_factor=2, + extent_differing=1, + expect_unified_diff_at = 22, + ), + Case( + line_length_factor=2, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 28, + ), + + # scale width and length by ratios of 3:1 + Case( + line_length_factor=1, + num_lines_factor=3, + extent_differing=1, + expect_unified_diff_at = 16, + ), + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 24, + ), + + # # scale by ratios of 3:1, with only 40% differing + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=1, + expect_unified_diff_at = 24, + ), + Case( + line_length_factor=1, + num_lines_factor=3, + extent_differing=0.4, + expect_unified_diff_at = 16, + ), + Case( + line_length_factor=3, + num_lines_factor=1, + extent_differing=0.4, + expect_unified_diff_at = 23, + ), + ) + + # --- Execute test cases + # ---------------------- + + def run_case(case: Case, N): + """Given one of the test cases above, execute the test case for a + given `N` constant value. Check if the test has passed as + specified.""" + + # we are working out way towards _heuristic_diff(foo, bar) + + # --- Construct Differing Strings --- + + # construct foo. Double line count because bar will have twice + # as many lines (lines of 'a' and lines of 'b') + foo = ( + ('a' * N * (case.line_length_factor),) # create line + * (N * case.num_lines_factor * 2) # duplicate line + ) + + # construct bar + bar_a_line = ('a' * (N * case.line_length_factor)) + bar_b_line = ('b' * (N * case.line_length_factor)) + bar_a_lines = ((bar_a_line,) * (N * case.num_lines_factor)) + if case.extent_differing != 1: + # diminish the amount of 'b' by case.extent_differing, and add + # additional 'a' at the end as padding + bar_b_lines = ((bar_b_line,) + * int(N + * case.num_lines_factor + * case.extent_differing)) + bar_a_padding = ((bar_a_line,) + * int(N + * case.num_lines_factor + * (1 - case.extent_differing))) + bar = tuple((*bar_a_lines, *bar_b_lines, *bar_a_padding)) + else: + bar_b_lines = ((bar_b_line,) * int(N * case.num_lines_factor)) + bar = tuple((*bar_a_lines, *bar_b_lines)) + + # --- Perform Diff --- + + # after all that, we have `foo` and `bar`; two string sequences + # with differences as specified by the Case parameters, scaled + # by a factor of `N`. + diff = unittest.case._heuristic_diff(foo, bar) # type: ignore + + # --- Make Assertions --- + + # now, check that the `case.expect_unified_diff_at` condition was + # met + if ( + N < case.expect_unified_diff_at + and self.is_unified_diff(diff) + ): + self.fail('Switched to `unified_diff` prematurely. Expected ' + f'switch at {case.expect_unified_diff_at}, but ' + f'actually switched at {N} for the case {case}') + elif ( + N > case.expect_unified_diff_at + and not self.is_unified_diff(diff) + ): + self.fail('Switch to `unified_diff` did not occur. Expected ' + 'switch at {case.expect_unified_diff_at}, but no ' + f'switch occured when N == {N}') + + for case in cases: + for N in range(10, case.expect_unified_diff_at + 1): + run_case(case, N) + + def test_ndiff_is_always_used_for_similar_sequences(self): + """ndiff is perfectly efficient at showing small diffs. As long as + the difference between `a` and `b` are small, the size of `a` and `b` + should not disqualify the use of ndiff.""" + a = ('foo ' * 5 + '\n') * 10_000 + b = ('foo ' * 5 + '\n') * 9_999 + ('bar ' * 5 + '\n') + + diff = unittest.case._heuristic_diff(a, b) # type: ignore + self.assertFalse(self.is_unified_diff(diff)) if __name__ == "__main__": From a9d23c44cd1ec0f15921d675448da9f6d93d49f6 Mon Sep 17 00:00:00 2001 From: Jack DeVries <58614260+jdevries3133@users.noreply.github.com> Date: Fri, 13 Aug 2021 09:16:35 -0400 Subject: [PATCH 12/27] remove unnecessary list comprehension in Lib/unittest/case.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/unittest/case.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 9be056d1adc6ca..96320ae5e28811 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -169,9 +169,7 @@ def _heuristic_diff(a: list[str], b: list[str]) -> Iterable[str]: # unified diff is always cheap, so we can use it to measure the magnitude # of the differences, which is a proxy for the cost of difflib.ndiff - udiff = [l for l in difflib.unified_diff(a, b, - fromfile='expected', - tofile='got')] + udiff = list(difflib.unified_diff(a, b, fromfile="expected", tofile="got")) udiff_differing_lines = [l for l in udiff if l.startswith('-') or l.startswith('+')] num_difflines = len(udiff_differing_lines) From 192d7a4941b547439d62fd14b623bed482428100 Mon Sep 17 00:00:00 2001 From: Jack DeVries <58614260+jdevries3133@users.noreply.github.com> Date: Fri, 13 Aug 2021 09:16:49 -0400 Subject: [PATCH 13/27] spelling error in Lib/unittest/test/test_case.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/unittest/test/test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index e393274416e7c8..12afe7717e405d 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -1955,7 +1955,7 @@ def test2(self): class Test_HeuristicDiff(unittest.TestCase): - # this large contant coerces the use of `unified_diff` for several tests + # this large constant coerces the use of `unified_diff` for several tests N = 50_000 @staticmethod From 74895e90cb6efad984f9046255be19a0898f5fed Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Fri, 13 Aug 2021 09:17:06 -0400 Subject: [PATCH 14/27] implement second review from @ambv --- Lib/unittest/case.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 9be056d1adc6ca..0c668cc86fe075 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -5,7 +5,7 @@ import difflib import pprint import re -from typing import Iterable +from typing import Iterator import warnings import collections import contextlib @@ -155,16 +155,16 @@ def _is_subtype(expected, basetype): return isinstance(expected, type) and issubclass(expected, basetype) -def _heuristic_diff(a: list[str], b: list[str]) -> Iterable[str]: +def _heuristic_diff(a: list[str], b: list[str]) -> Iterator[str]: """After testing the magnitude of the inputs, preferably return the output of difflib.ndiff, but fallback to difflib.unified_diff for prohibitively expensive inputs. How cost is calclated: cost = (number of differing lines * total length of all differing lines) - """ - # bpo-19217: speed up assertEqual on long sequences + See bpo-19217 for context. + """ cost_limit = 1_000_000 # unified diff is always cheap, so we can use it to measure the magnitude From 28cb04245a6165404a4d1f9e82d0a0776e90358e Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 13:52:30 -0500 Subject: [PATCH 15/27] fix from @JelleZijlstra Co-authored-by: Jelle Zijlstra --- Lib/unittest/case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index dd6ba852cc010d..4eb7ea93ff147e 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -188,7 +188,7 @@ def _heuristic_diff(a: list[str], b: list[str]) -> Iterator[str]: # of the differences, which is a proxy for the cost of difflib.ndiff udiff = list(difflib.unified_diff(a, b, fromfile="expected", tofile="got")) udiff_differing_lines = [l for l in udiff - if l.startswith('-') or l.startswith('+')] + if l.startswith(('-', '+'))] num_difflines = len(udiff_differing_lines) total_diffline_length = sum(len(l) for l in udiff_differing_lines) From 63ecc457e6d7c9dc7a8c526cf69679ea2c6aa357 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 13:53:02 -0500 Subject: [PATCH 16/27] fix from @JelleZijlstra Co-authored-by: Jelle Zijlstra --- Lib/unittest/case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 4eb7ea93ff147e..77704d6bbe12ba 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -5,7 +5,7 @@ import difflib import pprint import re -from typing import Iterator +from collections.abc import Iterator import warnings import collections import contextlib From e4344cbc3110fc33d69959e07756f626ae90a009 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 14:00:23 -0500 Subject: [PATCH 17/27] remove unnecessary type checker supression --- Lib/unittest/test/test_case.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index b5d88156b906ea..1c63ebb0967add 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -823,7 +823,7 @@ def testAssertSequenceEqualMaxDiff(self): self.assertEqual(self.maxDiff, 80*8) seq1 = 'a' + 'x' * 80**2 seq2 = 'b' + 'x' * 80**2 - diff = unittest.case._heuristic_diff(pprint.pformat(seq1).splitlines(), # type: ignore + diff = unittest.case._heuristic_diff(pprint.pformat(seq1).splitlines(), pprint.pformat(seq2).splitlines()) diff = '\n'.join(diff) # the +1 is the leading \n added by assertSequenceEqual @@ -1998,7 +1998,7 @@ def test_is_unified_diff(self): def assertHeuristicDiffReturns(self, a, b, expect: tuple[str, ...]): """check that _heuristic_diff(a, b) == expect""" - diff_iterable = unittest.case._heuristic_diff(a, b) # type: ignore + diff_iterable = unittest.case._heuristic_diff(a, b) diff = tuple(diff_iterable) self.assertTrue(diff == expect) @@ -2033,7 +2033,7 @@ def test_ndiff_to_unified_diff_breaking_point_long_line(self): a = '' b = '' n = expect_switch_at // 2 - while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): n *= 1.3 a = ('a' * int(n),) b = ('b' * int(n),) @@ -2048,7 +2048,7 @@ def test_ndiff_to_unified_diff_breaking_point_many_lines(self): a = '' b = '' n = expect_switch_at // 2 - while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): # type: ignore + while '@' not in ''.join(unittest.case._heuristic_diff(a, b)): n *= 1.3 a = ('a\n' * int(n),) b = ('b\n' * int(n),) @@ -2169,7 +2169,7 @@ def run_case(case: Case, N): # after all that, we have `foo` and `bar`; two string sequences # with differences as specified by the Case parameters, scaled # by a factor of `N`. - diff = unittest.case._heuristic_diff(foo, bar) # type: ignore + diff = unittest.case._heuristic_diff(foo, bar) # --- Make Assertions --- @@ -2201,7 +2201,7 @@ def test_ndiff_is_always_used_for_similar_sequences(self): a = ('foo ' * 5 + '\n') * 10_000 b = ('foo ' * 5 + '\n') * 9_999 + ('bar ' * 5 + '\n') - diff = unittest.case._heuristic_diff(a, b) # type: ignore + diff = unittest.case._heuristic_diff(a, b) self.assertFalse(self.is_unified_diff(diff)) From 0bdf06ce5bf56769a05b14511c51345650912dfb Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 14:02:03 -0500 Subject: [PATCH 18/27] fix typo --- Lib/unittest/test/test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index 1c63ebb0967add..b1af535302ee65 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -2133,7 +2133,7 @@ def run_case(case: Case, N): given `N` constant value. Check if the test has passed as specified.""" - # we are working out way towards _heuristic_diff(foo, bar) + # we are working our way towards _heuristic_diff(foo, bar) # --- Construct Differing Strings --- From 05ffdf2366bed1db12aa884eef425ed3dab377b7 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 14:02:18 -0500 Subject: [PATCH 19/27] simplify tuple syntax --- Lib/unittest/test/test_case.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index b1af535302ee65..7103678539a217 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -2159,10 +2159,10 @@ def run_case(case: Case, N): * int(N * case.num_lines_factor * (1 - case.extent_differing))) - bar = tuple((*bar_a_lines, *bar_b_lines, *bar_a_padding)) + bar = (*bar_a_lines, *bar_b_lines, *bar_a_padding) else: bar_b_lines = ((bar_b_line,) * int(N * case.num_lines_factor)) - bar = tuple((*bar_a_lines, *bar_b_lines)) + bar = (*bar_a_lines, *bar_b_lines) # --- Perform Diff --- From 5767d217238839096be167f599d2319725c299f7 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 14:02:43 -0500 Subject: [PATCH 20/27] fix news entry "~lists~ => sequenecs" --- .../next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst index 63431453071b8f..2bc060b713670f 100644 --- a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst +++ b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst @@ -1,2 +1,2 @@ -Optimize unittest.TestCase.assertEqual method for long lists of varied +Optimize unittest.TestCase.assertEqual method for long sequences of varied items. From b9f2f9db79d971a2022ad915d42198827aed7b18 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 15:36:15 -0500 Subject: [PATCH 21/27] better document the reasoning behind the heuristic --- Lib/unittest/case.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 77704d6bbe12ba..0a55536e3c3425 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -175,26 +175,37 @@ def _is_subtype(expected, basetype): def _heuristic_diff(a: list[str], b: list[str]) -> Iterator[str]: """After testing the magnitude of the inputs, preferably return the output of difflib.ndiff, but fallback to difflib.unified_diff for prohibitively - expensive inputs. How cost is calclated: + expensive inputs. + + Cost is calculated according to this heuristic: cost = (number of differing lines * total length of all differing lines) - See bpo-19217 for context. + This heuristic is used because the time complexity of ndiff is + approximately O((diff)^2), where `diff` is the product of the number of + differing lines, and the total length of differing lines. On the other + hand, unified_diff's cost is the same as the cost of producing `diff` + by itself: O(a + b). + + See bpo-19217 for additional context. """ - cost_limit = 1_000_000 + COST_LIMIT = 1_000_000 - # unified diff is always cheap, so we can use it to measure the magnitude - # of the differences, which is a proxy for the cost of difflib.ndiff + # call unified_diff udiff = list(difflib.unified_diff(a, b, fromfile="expected", tofile="got")) udiff_differing_lines = [l for l in udiff if l.startswith(('-', '+'))] + + # inspect unified_diff output num_difflines = len(udiff_differing_lines) total_diffline_length = sum(len(l) for l in udiff_differing_lines) + # now, we know what it will cost to call `ndiff`, according to the + # heuristic diff_cost = num_difflines * total_diffline_length - if diff_cost > cost_limit: + if diff_cost > COST_LIMIT: yield from udiff else: yield from difflib.ndiff(a, b) From 5351506d7768316c1a4268b483765d3fff26be65 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 16:22:41 -0500 Subject: [PATCH 22/27] thanks @JelleZijlstra Co-authored-by: Jelle Zijlstra --- Lib/unittest/case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 0a55536e3c3425..d6d31b2c0ea84a 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -175,7 +175,7 @@ def _is_subtype(expected, basetype): def _heuristic_diff(a: list[str], b: list[str]) -> Iterator[str]: """After testing the magnitude of the inputs, preferably return the output of difflib.ndiff, but fallback to difflib.unified_diff for prohibitively - expensive inputs. + expensive inputs. How cost is calculated: Cost is calculated according to this heuristic: From 872de086d36b92dc12499f157c1375c9da2ba713 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 16:23:00 -0500 Subject: [PATCH 23/27] thanks @JelleZijlstra Co-authored-by: Jelle Zijlstra --- .../next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst index 2bc060b713670f..32e9d63381fecc 100644 --- a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst +++ b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst @@ -1,2 +1,2 @@ -Optimize unittest.TestCase.assertEqual method for long sequences of varied +Optimize :meth:`~unittest.TestCase.assertEqual` method for long sequences of varied items. From 180c7323dfd794fe3f0e6919e7a02d35ad1e62f2 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sat, 29 Jan 2022 16:26:45 -0500 Subject: [PATCH 24/27] fix whitespace around keyword argument --- Lib/unittest/test/test_case.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/unittest/test/test_case.py b/Lib/unittest/test/test_case.py index 7103678539a217..9b8bf8bc5ad877 100644 --- a/Lib/unittest/test/test_case.py +++ b/Lib/unittest/test/test_case.py @@ -2081,13 +2081,13 @@ class Case: line_length_factor=1, num_lines_factor=2, extent_differing=1, - expect_unified_diff_at = 22, + expect_unified_diff_at=22, ), Case( line_length_factor=2, num_lines_factor=1, extent_differing=1, - expect_unified_diff_at = 28, + expect_unified_diff_at=28, ), # scale width and length by ratios of 3:1 @@ -2095,13 +2095,13 @@ class Case: line_length_factor=1, num_lines_factor=3, extent_differing=1, - expect_unified_diff_at = 16, + expect_unified_diff_at=16, ), Case( line_length_factor=3, num_lines_factor=1, extent_differing=1, - expect_unified_diff_at = 24, + expect_unified_diff_at=24, ), # # scale by ratios of 3:1, with only 40% differing @@ -2109,19 +2109,19 @@ class Case: line_length_factor=3, num_lines_factor=1, extent_differing=1, - expect_unified_diff_at = 24, + expect_unified_diff_at=24, ), Case( line_length_factor=1, num_lines_factor=3, extent_differing=0.4, - expect_unified_diff_at = 16, + expect_unified_diff_at=16, ), Case( line_length_factor=3, num_lines_factor=1, extent_differing=0.4, - expect_unified_diff_at = 23, + expect_unified_diff_at=23, ), ) From fc752cf40b752a28781e67b457de0d31f54dee14 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Mon, 7 Feb 2022 13:28:54 -0500 Subject: [PATCH 25/27] update blurb to describe observable changes --- .../Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst index 32e9d63381fecc..d06d89bb7b72a3 100644 --- a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst +++ b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst @@ -1,2 +1,9 @@ -Optimize :meth:`~unittest.TestCase.assertEqual` method for long sequences of varied -items. +Optimize :meth:`~unittest.TestCase.assertEqual` for long sequences of varied +items. Based on an internal heuristic, the algorithm used to produce the diff +method will switch based on the relevant magnitude of inputs. As a result, diff +output after a failing assertion may appear differently for large inputs. +Specifically, unittest will internally switch from using :func:`difflib.ndiff` +(slow) to using :func:`difflib.unified_diff` (fast). + +This optimization ensures that the non-linear time complexity of +:func:`difflib.ndiff` does not cause a test suite to hang. From adc7d6f92818b3e64a652ff290d900b1dfae0caf Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 7 Feb 2022 17:46:17 -0800 Subject: [PATCH 26/27] reword to make less promises unified diff is not "fast" just "less likely slow" or "slow in different circumstances". so this doesn't ensure the problem never happens, it just reduces its chance. --- .../next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst index d06d89bb7b72a3..6814851d4d583b 100644 --- a/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst +++ b/Misc/NEWS.d/next/Library/2021-07-21-22-57-51.bpo-19217.vm-cr-.rst @@ -3,7 +3,7 @@ items. Based on an internal heuristic, the algorithm used to produce the diff method will switch based on the relevant magnitude of inputs. As a result, diff output after a failing assertion may appear differently for large inputs. Specifically, unittest will internally switch from using :func:`difflib.ndiff` -(slow) to using :func:`difflib.unified_diff` (fast). +(slow) to using :func:`difflib.unified_diff` (less likely to be slow). -This optimization ensures that the non-linear time complexity of -:func:`difflib.ndiff` does not cause a test suite to hang. +This optimization reduces that chance that non-linear time complexity of +diff algorithms do not cause a test suite's failing test to hang. From 4c5175cc6a71c7e4836e9afbe8fe79db5a5a5c8b Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 9 Apr 2025 23:26:17 +0100 Subject: [PATCH 27/27] Remove annotations --- Lib/test/test_unittest/test_case.py | 7 +++---- Lib/unittest/case.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index ae5181ee20966f..e73e90deca54d2 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -10,7 +10,6 @@ import weakref import inspect import types -from typing import Iterator from collections import UserString from copy import deepcopy @@ -2334,7 +2333,7 @@ class Test_HeuristicDiff(unittest.TestCase): N = 50_000 @staticmethod - def is_unified_diff(diff: Iterator[str]) -> bool: + def is_unified_diff(diff): """Check for the presence of the @@ ... @@ diff summary line.""" diffstr = ''.join(diff) p = r'@@ -(\d(,)?(\d)?)+ \+(\d(,)?(\d)?)+ @@' @@ -2348,7 +2347,7 @@ def test_is_unified_diff(self): self.assertTrue(self.is_unified_diff(ud)) self.assertFalse(self.is_unified_diff(nd)) - def assertHeuristicDiffReturns(self, a, b, expect: tuple[str, ...]): + def assertHeuristicDiffReturns(self, a, b, expect): """check that _heuristic_diff(a, b) == expect""" diff_iterable = unittest.case._heuristic_diff(a, b) diff = tuple(diff_iterable) @@ -2480,7 +2479,7 @@ class Case: # --- Execute test cases # ---------------------- - def run_case(case: Case, N): + def run_case(case, N): """Given one of the test cases above, execute the test case for a given `N` constant value. Check if the test has passed as specified.""" diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index c119b79fda3a43..45b3b105bdedb8 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -5,7 +5,6 @@ import difflib import pprint import re -from collections.abc import Iterator import warnings import collections import contextlib @@ -192,7 +191,7 @@ def _is_subtype(expected, basetype): return isinstance(expected, type) and issubclass(expected, basetype) -def _heuristic_diff(a: list[str], b: list[str]) -> Iterator[str]: +def _heuristic_diff(a, b): """After testing the magnitude of the inputs, preferably return the output of difflib.ndiff, but fallback to difflib.unified_diff for prohibitively expensive inputs. How cost is calculated: