8000 gh-57141: Add dircmp shallow option. by aunzat · Pull Request #109499 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-57141: Add dircmp shallow option. #109499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Doc/library/filecmp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,20 @@ The :mod:`filecmp` module defines the following functions:
The :class:`dircmp` class
-------------------------

.. class:: dircmp(a, b, ignore=None, hide=None)
.. class:: dircmp(a, b, ignore=None, hide=None, shallow=True)

Construct a new directory comparison object, to compare the directories *a*
and *b*. *ignore* is a list of names to ignore, and defaults to
:const:`filecmp.DEFAULT_IGNORES`. *hide* is a list of names to hide, and
defaults to ``[os.curdir, os.pardir]``.

The :class:`dircmp` class compares files by doing *shallow* comparisons
as described for :func:`filecmp.cmp`.
as described for :func:`filecmp.cmp` by default using the *shallow*
pa 8000 rameter.

.. versionchanged:: 3.13

Added the *shallow* parameter.

The :class:`dircmp` class provides the following methods:

Expand Down
13 changes: 9 additions & 4 deletions Lib/filecmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ def _do_cmp(f1, f2):
class dircmp:
"""A class that manages the comparison of 2 directories.

dircmp(a, b, ignore=None, hide=None)
dircmp(a, b, ignore=None, hide=None, shallow=True)
A and B are directories.
IGNORE is a list of names to ignore,
defaults to DEFAULT_IGNORES.
HIDE is a list of names to hide,
defaults to [os.curdir, os.pardir].
SHALLOW specifies whether to just check the stat signature (do not read
the files).
defaults to True.

High level usage:
x = dircmp(dir1, dir2)
Expand Down Expand Up @@ -121,7 +124,7 @@ class dircmp:
in common_dirs.
"""

def __init__(self, a, b, ignore=None, hide=None): # Initialize
def __init__(self, a, b, ignore=None, hide=None, shallow=True): # Initialize
self.left = a
self.right = b
if hide is None:
Expand All @@ -132,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize
self.ignore = DEFAULT_IGNORES
else:
self.ignore = ignore
self.shallow = shallow

def phase0(self): # Compare everything except common subdirectories
self.left_list = _filter(os.listdir(self.left),
Expand Down Expand Up @@ -184,7 +188,7 @@ def phase2(self): # Distinguish files, directories, funnies
self.common_funny.append(x)

def phase3(self): # Find out differences between common files
xx = cmpfiles(self.left, self.right, self.common_files)
xx = cmpfiles(self.left, self.right, self.common_files, self.shallow)
self.same_files, self.diff_files, self.funny_files = xx

def phase4(self): # Find out differences between common subdirectories
Expand All @@ -196,7 +200,8 @@ def phase4(self): # Find out differences between common subdirectories
for x in self.common_dirs:
a_x = os.path.join(self.left, x)
b_x = os.path.join(self.right, x)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide,
self.shallow)

def phase4_closure(self): # Recursively call phase4() on subdirectories
self.phase4()
Expand Down
126 changes: 101 additions & 25 deletions Lib/test/test_filecmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,44 @@
from test.support import os_helper


def _create_file_shallow_equal(template_path, new_path):
"""create a file with the same size and mtime but different content."""
shutil.copy2(template_path, new_path)
with open(new_path, 'r+b') as f:
next_char = bytearray(f.read(1))
next_char[0] = (next_char[0] + 1) % 256
f.seek(0)
f.write(next_char)
shutil.copystat(template_path, new_path)
assert os.stat(new_path).st_size == os.stat(template_path).st_size
assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime

class FileCompareTestCase(unittest.TestCase):
def setUp(self):
self.name = os_helper.TESTFN
self.name_same = os_helper.TESTFN + '-same'
self.name_diff = os_helper.TESTFN + '-diff'
self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
data = 'Contents of file go here.\n'
for name in [self.name, self.name_same, self.name_diff]:
with open(name, 'w', encoding="utf-8") as output:
output.write(data)

with open(self.name_diff, 'a+', encoding="utf-8") as output:
output.write('An extra line.\n')

for name in [self.name_same, self.name_diff]:
shutil.copystat(self.name, name)

_create_file_shallow_equal(self.name, self.name_same_shallow)

self.dir = tempfile.gettempdir()

def tearDown(self):
os.unlink(self.name)
os.unlink(self.name_same)
os.unlink(self.name_diff)
os.unlink(self.name_same_shallow)

def test_matching(self):
self.assertTrue(filecmp.cmp(self.name, self.name),
Expand All @@ -36,12 +56,17 @@ def test_matching(self):
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
"Shallow identical files should be considered equal")

def test_different(self):
self.assertFalse(filecmp.cmp(self.name, self.name_diff),
"Mismatched files compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.dir),
"File and directory compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
shallow=False),
"Mismatched file to shallow identical file compares as equal")

def test_cache_clear(self):
first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
Expand All @@ -56,14 +81,26 @@ def setUp(self):
self.dir = os.path.join(tmpdir, 'dir')
self.dir_same = os.path.join(tmpdir, 'dir-same')
self.dir_diff = os.path.join(tmpdir, 'dir-diff')
self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')

# Another dir is created under dir_same, but it has a name from the
# ignored list so it should not affect testing results.
self.dir_ignored = os.path.join(self.dir_same, '.hg')

self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
data = 'Contents of file go here.\n'
for dir in (self.dir, self.dir_same, self.dir_diff, self.dir_ignored):

shutil.rmtree(self.dir, True)
os.mkdir(self.dir)
subdir_path = os.path.join(self.dir, 'subdir')
os.mkdir(subdir_path)
dir_file_path = os.path.join(self.dir, "file")
with open(dir_file_path, 'w', encoding="utf-8") as output:
output.write(data)

for dir in (self.dir_same, self.dir_same_shallow,
self.dir_diff, self.dir_diff_file):
shutil.rmtree(dir, True)
os.mkdir(dir)
subdir_path = os.path.join(dir, 'subdir')
Expand All @@ -72,14 +109,25 @@ def setUp(self):
fn = 'FiLe' # Verify case-insensitive comparison
else:
fn = 'file'
with open(os.path.join(dir, fn), 'w', encoding="utf-8") as output:
output.write(data)

file_path = os.path.join(dir, fn)

if dir is self.dir_same_shallow:
_create_file_shallow_equal(dir_file_path, file_path)
else:
shutil.copy2(dir_file_path, file_path)

with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('An extra file.\n')

# Add different file2 with respect to dir_diff
with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')


def tearDown(self):
for dir in (self.dir, self.dir_same, self.dir_diff):
for dir in (self.dir, self.dir_same, self.dir_diff,
self.dir_same_shallow, self.dir_diff_file):
shutil.rmtree(dir)

def test_default_ignores(self):
Expand All @@ -102,11 +150,7 @@ def test_cmpfiles(self):
shallow=False),
"Comparing directory to same fails")

# Add different file2
with open(os.path.join(self.dir, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')

self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_same,
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
['file', 'file2']) ==
(['file'], ['file2'], []),
"Comparing mismatched directories fails")
Expand All @@ -116,11 +160,22 @@ def _assert_lists(self, actual, expected):
"""Assert that two lists are equal, up to ordering."""
self.assertEqual(sorted(actual), sorted(expected))

def test_dircmp_identical_directories(self):
self._assert_dircmp_identical_directories()
self._assert_dircmp_identical_directories(shallow=False)

def test_dircmp(self):
def test_dircmp_different_file(self):
self._assert_dircmp_different_file()
self._assert_dircmp_different_file(shallow=False)

def test_dircmp_different_directories(self):
self._assert_dircmp_different_directories()
self._assert_dircmp_different_directories(shallow=False)

def _assert_dircmp_identical_directories(self, **options):
# Check attributes for comparison of two identical directories
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
if self.caseinsensitive:
Expand All @@ -142,9 +197,10 @@ def test_dircmp(self):
]
self._assert_report(d.report, expected_report)

def _assert_dircmp_different_directories(self, **options):
# Check attributes for comparison of two different directories (right)
left_dir, right_dir = self.dir, self.dir_diff
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'subdir'])
Expand All @@ -164,12 +220,8 @@ def test_dircmp(self):
self._assert_report(d.report, expected_report)

# Check attributes for comparison of two different directories (left)
left_dir, right_dir = self.dir, self.dir_diff
shutil.move(
os.path.join(self.dir_diff, 'file2'),
os.path.join(self.dir, 'file2')
)
d = filecmp.dircmp(left_dir, right_dir)
left_dir, right_dir = self.dir_diff, self.dir
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
Expand All @@ -180,27 +232,51 @@ def test_dircmp(self):
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"Only in {} : ['file2']".format(self.dir),
"diff {} {}".format(self.dir_diff, self.dir),
"Only in {} : ['file2']".format(self.dir_diff),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

# Add different file2
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
d = filecmp.dircmp(self.dir, self.dir_diff)

def _assert_dircmp_different_file(self, **options):
# A different file2
d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, ['file2'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"diff {} {}".format(self.dir_diff, self.dir_diff_file),
"Identical files : ['file']",
"Differing files : ['file2']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_no_shallow_different_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
self.assertEqual(d.same_files, [])
self.assertEqual(d.diff_files, ['file'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Differing files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_shallow_same_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_subdirs_type(self):
"""Check that dircmp.subdirs respects subclassing."""
class MyDirCmp(filecmp.dircmp):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add option for *non-shallow* comparisons to :class:`filecmp.dircmp` like
:func:`filecmp.cmp`. Original patch by Steven Ward. Enhanced by
Tobias Rautenkranz
0