From dc8e774e07f900e67f4e43f593c17f3ae3e9fce8 Mon Sep 17 00:00:00 2001 From: Xavier de Gaye Date: Thu, 25 Apr 2019 16:59:21 +0200 Subject: [PATCH] Add the `duplicate_meth_defs.py` tool as a pre-commit check on Travis --- .../2019-04-20-15-29-04.bpo-16079.KRYhYf.rst | 5 + Tools/scripts/duplicate_meth_defs.py | 282 ++++++++++++++++++ Tools/scripts/duplicates_ignored.txt | 67 +++++ Tools/scripts/patchcheck.py | 56 +++- 4 files changed, 399 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2019-04-20-15-29-04.bpo-16079.KRYhYf.rst create mode 100755 Tools/scripts/duplicate_meth_defs.py create mode 100644 Tools/scripts/duplicates_ignored.txt diff --git a/Misc/NEWS.d/next/Tools-Demos/2019-04-20-15-29-04.bpo-16079.KRYhYf.rst b/Misc/NEWS.d/next/Tools-Demos/2019-04-20-15-29-04.bpo-16079.KRYhYf.rst new file mode 100644 index 00000000000000..428ab78065a04b --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2019-04-20-15-29-04.bpo-16079.KRYhYf.rst @@ -0,0 +1,5 @@ +Add the `duplicate_meth_defs.py` tool as a pre-commit check on Travis. The +`duplicate_meth_defs.py` tool reports duplicate method definitions in a +given list of files or directories. One must add an entry in +`Tools/scripts/duplicates_ignored.txt` to by-pass the pre-commit check when +the reported duplicate is a false positive. diff --git a/Tools/scripts/duplicate_meth_defs.py b/Tools/scripts/duplicate_meth_defs.py new file mode 100755 index 00000000000000..0f70e83653b187 --- /dev/null +++ b/Tools/scripts/duplicate_meth_defs.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +"""Print duplicate method definitions in the same scope.""" + +import sys +import os +import ast +import re +import argparse +import traceback +from tokenize import open as tokenize_open +from contextlib import contextmanager + +class ClassFunctionVisitor(ast.NodeVisitor): + def __init__(self, duplicates): + self.duplicates = duplicates + self.scopes = [] + + @contextmanager + def _visit_new_scope(self, node): + prev_scope = self.scopes[-1] if len(self.scopes) else None + new_scope = Scope(node, prev_scope) + try: + yield prev_scope, new_scope + finally: + self.scopes.append(new_scope) + self.generic_visit(node) + self.scopes.pop() + + def visit_FunctionDef(self, node): + with self._visit_new_scope(node) as (previous, new): + if previous is not None and previous.type == 'ClassDef': + new.check_duplicate_meth(self.duplicates) + + visit_AsyncFunctionDef = visit_FunctionDef + + def visit_ClassDef(self, node): + with self._visit_new_scope(node) as (previous, new): + pass + +class Scope: + def __init__(self, node, previous): + self.node = node + self.previous = previous + self.type = node.__class__.__name__ + self.methods = [] + if previous is None: + self.id = [node.name] + else: + self.id = previous.id + [node.name] + + def check_duplicate_meth(self, duplicates): + node = self.node + name = node.name + # Skip property and generic methods. + if 'decorator_list' in node._fields and node.decorator_list: + for decorator in node.decorator_list: + if (isinstance(decorator, ast.Attribute) and + 'attr' in decorator._fields and + decorator.attr in + ('setter', 'getter', 'deleter', 'register')): + return + # A functools single-dispatch method. + elif (isinstance(decorator, ast.Call) and + 'func' in decorator._fields and + 'attr' in decorator.func._fields and + decorator.func.attr == 'register'): + return + + if name in self.previous.methods: + duplicates.append((self.id, node.lineno)) + else: + self.previous.methods.append(name) + +def get_duplicates(source, fname): + duplicates = [] + tree = ast.parse(source, fname) + ClassFunctionVisitor(duplicates).visit(tree) + return duplicates + +def duplicates_exist(source, fname='', dups_to_ignore=[]): + """Print duplicates and return True if duplicates exist. + + >>> test_basic = ''' + ... class C: + ... def foo(self): pass + ... def foo(self): pass + ... ''' + >>> duplicates_exist(test_basic) + :4 C.foo + True + + >>> test_asyncdef = ''' + ... class C: + ... async def foo(self): pass + ... async def foo(self): pass + ... def foo(self): pass + ... ''' + >>> duplicates_exist(test_asyncdef) + :4 C.foo + :5 C.foo + True + + >>> test_ignore = ''' + ... class C: + ... def foo(self): pass + ... def foo(self): pass + ... ''' + >>> duplicates_exist(test_ignore, dups_to_ignore=['C.foo']) + False + + >>> test_nested = ''' + ... def foo(): + ... def bar(): pass + ... + ... class C: + ... def foo(self): pass + ... def bar(self): pass + ... def bar(self): pass + ... + ... class D: + ... def foo(self): pass + ... def bar(self): pass + ... def bar(self): pass + ... + ... def bar(self): pass + ... + ... def bar(): pass + ... ''' + >>> duplicates_exist(test_nested) + :8 foo.C.bar + :13 foo.C.D.bar + :15 foo.C.bar + True + + >>> test_generic = ''' + ... class A: + ... @functools.singledispatchmethod + ... def t(self, arg): + ... self.arg = "base" + ... @t.register(int) + ... def _(self, arg): + ... self.arg = "int" + ... @t.register(str) + ... def _(self, arg): + ... self.arg = "str" + ... + ... class C: + ... @property + ... def foo(self): pass + ... + ... @foo.setter + ... def foo(self, a, b): pass + ... + ... def bar(self): pass + ... def bar(self): pass + ... + ... class D: + ... @property + ... def foo(self): pass + ... + ... @foo.setter + ... def foo(self, a, b): pass + ... + ... def bar(self): pass + ... def bar(self): pass + ... def foo(self): pass + ... ''' + >>> duplicates_exist(test_generic) + :21 C.bar + :31 C.D.bar + :32 C.D.foo + True + """ + + cnt = 0 + duplicates = get_duplicates(source, fname) + if duplicates: + for (id, lineno) in duplicates: + id = '.'.join(id) + if dups_to_ignore is None or id not in dups_to_ignore: + print('%s:%d %s' % (fname, lineno, id)) + cnt += 1 + return False if cnt == 0 else True + +def iter_modules(paths): + for f in paths: + if os.path.isdir(f): + for path, dirs, filenames in os.walk(f): + for fname in filenames: + if os.path.splitext(fname)[1] == '.py': + yield os.path.normpath(os.path.join(path, fname)) + else: + yield os.path.normpath(f) + +def ignored_dups(f): + """Parse the ignore duplicates file.""" + + # Comments or empty lines. + comments = re.compile(r'^\s*#.*$|^\s*$') + + # Mapping of filename to the list of duplicates to ignore. + ignored = {} + if f is None: + return ignored + with f: + for item in (l.rstrip(': \n\t').split(':') for l in f + if not comments.match(l)): + path = item[0].strip() + if not path: + continue + path = os.path.normpath(path) + if len(item) == 1: + ignored[path] = [] + else: + dupname = item[1].strip() + if path in ignored: + ignored[path].append(dupname) + else: + ignored[path] = [dupname] + return ignored + +def parse_args(argv): + parser = argparse.ArgumentParser(description=__doc__.strip()) + parser.add_argument('files', metavar='path', nargs='*', + help='python module or directory pathname - when a directory, ' + 'the python modules in the directory tree are searched ' + 'recursively for duplicates.') + parser.add_argument('-i', '--ignore', metavar='fname', type=open, + help='ignore duplicates or files listed in , see' + ' Tools/scripts/duplicates_ignored.txt for the' + ' specification of the format of the entries in this file') + parser.add_argument('-x', '--exclude', metavar='prefixes', + help="comma separated list of path names prefixes to exclude" + " (default: '%(default)s')") + parser.add_argument('-t', '--test', action='store_true', + help='run the doctests') + return parser.parse_args(argv) + +def _main(args): + ignored = ignored_dups(args.ignore) + if args.exclude: + prefixes = [p.strip() for p in args.exclude.split(',')] + else: + prefixes = [] + + # Find duplicates. + rc = 0 + for fname in iter_modules(args.files): + def excluded(f): + for p in prefixes: + if f.startswith(p): + return True + + # Skip files whose prefix is excluded or that are configured in the + # '--ignore' file. + if excluded(fname): + continue + dups_to_ignore = ignored.get(fname) + if dups_to_ignore == []: + continue + + try: + with tokenize_open(fname) as f: + if duplicates_exist(f.read(), fname, dups_to_ignore): + rc = 1 + except (UnicodeDecodeError, SyntaxError) as e: + print('%s: %s' % (fname, e), file=sys.stderr) + traceback.print_tb(sys.exc_info()[2]) + rc = 1 + + return rc + +def main(argv): + args = parse_args(argv) + if args.test: + import doctest + doctest.testmod() + else: + return _main(args) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/Tools/scripts/duplicates_ignored.txt b/Tools/scripts/duplicates_ignored.txt new file mode 100644 index 00000000000000..ecefb15d126c0e --- /dev/null +++ b/Tools/scripts/duplicates_ignored.txt @@ -0,0 +1,67 @@ +# Files or duplicates ignored by Tools/scripts/duplicate_meth_defs.py +# when run with the '--ignore' command line option. +# +# Format: +# filename[: mfqn] +# +# The whole file is ignored when mfqn (method fully qualified name) is +# missing. Use instead the '--exclude' command line option of +# duplicate_meth_defs.py to ignore a directory tree. + +# Files with bad encoding or bad syntax. +Lib/lib2to3/tests/data/bom.py +Lib/lib2to3/tests/data/crlf.py +Lib/lib2to3/tests/data/different_encoding.py +Lib/lib2to3/tests/data/false_encoding.py +Lib/lib2to3/tests/data/py2_test_grammar.py +Lib/test/bad_coding.py +Lib/test/bad_coding2.py +Lib/test/badsyntax_pep3120.py +Lib/test/badsyntax_3131.py +Tools/test2to3/maintest.py +Tools/test2to3/test2to3/hello.py +Tools/test2to3/test/test_foo.py + +# False positives. +Lib/multiprocessing/connection.py: Connection._close +Lib/pathlib.py: _NormalAccessor.symlink +Lib/socket.py: socket._sendfile_use_sendfile +Lib/socket.py: socket.get_inheritable +Lib/socket.py: socket.set_inheritable +Lib/ssl.py: SSLContext.hostname_checks_common_name +Lib/subprocess.py: Popen._get_handles +Lib/subprocess.py: Popen._execute_child +Lib/subprocess.py: Popen._internal_poll +Lib/subprocess.py: Popen._wait +Lib/subprocess.py: Popen._communicate +Lib/subprocess.py: Popen.send_signal +Lib/subprocess.py: Popen.terminate +Lib/tempfile.py: _TemporaryFileCloser.close +Lib/test/test_socket.py: InterruptedTimeoutBase.setAlarm +Lib/test/test_pathlib.py: _BasePathTest.dirlink + + +# Duplicates that must be fixed and that are temporarily ignored. +# Issue bpo-19113 +Lib/ctypes/test/test_functions.py: FunctionTestCase.test_errors + +# Issue bpo-36678 +Lib/test/test_dataclasses.py: TestCase.test_helper_asdict_builtin_containers +Lib/test/test_dataclasses.py: TestCase.test_helper_astuple_builtin_containers +Lib/test/test_dataclasses.py: TestCase.test_not_tuple +Lib/test/test_dataclasses.py: TestReplace.test_recursive_repr_two_attrs + +# Issue bpo-36679 +Lib/test/test_genericclass.py: TestClassGetitem.test_class_getitem + +# Issue bpo-19119 +Lib/test/test_heapq.py: TestErrorHandling.test_get_only + +# Issue bpo-36680 +Lib/test/test_importlib/test_util.py: PEP3147Tests.test_source_from_cache_path_like_arg + +# Issue bpo-36681 +Lib/test/test_logging.py: BuiltinLevelsTest.test_regression_29220 + +# Issue bpo-36683 +Lib/test/test_utf8_mode.py: UTF8ModeTests.test_io_encoding diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py index 8a8480a0c22b37..e19517cbf9a7c4 100755 --- a/Tools/scripts/patchcheck.py +++ b/Tools/scripts/patchcheck.py @@ -6,9 +6,13 @@ import os.path import subprocess import sysconfig +import functools import reindent import untabify +from io import StringIO +from test.support import change_cwd +from duplicate_meth_defs import main as find_duplicates # Excluded directories which are copies of external libraries: @@ -21,9 +25,9 @@ SRCDIR = sysconfig.get_config_var('srcdir') -def n_files_str(count): - """Return 'N file(s)' with the proper plurality on 'file'.""" - return "{} file{}".format(count, "s" if count != 1 else "") +def n_names_str(count, name=None): + """Return 'N name(s)' with the proper plurality on 'name'.""" + return "{} {}{}".format(count, name, "s" if count > 1 else "") def status(message, modal=False, info=None): @@ -90,7 +94,7 @@ def get_base_branch(): @status("Getting the list of files that have been added/changed", - info=lambda x: n_files_str(len(x))) + info=lambda x: n_names_str(len(x), 'file')) def changed_files(base_branch=None): """Get the list of changed or added files from git.""" if os.path.exists(os.path.join(SRCDIR, '.git')): @@ -131,15 +135,24 @@ def changed_files(base_branch=None): return filenames2 -def report_modified_files(file_paths): - count = len(file_paths) +def report_modified(lines, name=None): + count = len(lines) if count == 0: - return n_files_str(count) + return n_names_str(count, name) else: - lines = ["{}:".format(n_files_str(count))] - for path in file_paths: - lines.append(" {}".format(path)) - return "\n".join(lines) + out = ["{}:".format(n_names_str(count, name))] + for line in lines: + out.append(" {}".format(line)) + return "\n".join(out) + +report_modified_files = functools.partial(report_modified, name='file') + +def report_modified_dups(args): + got_exception, dups = args + if got_exception: + return '' + else: + return report_modified(dups, 'duplicate') @status("Fixing Python file whitespace", info=report_modified_files) @@ -220,6 +233,23 @@ def regenerated_pyconfig_h_in(file_paths): else: return "not needed" +@status("Duplicate method definitions", info=report_modified_dups) +def duplicates_found(python_files): + argv = ['-i', + os.path.join(SRCDIR, 'Tools/scripts/duplicates_ignored.txt')] + argv.extend(python_files) + save_stdout = sys.stdout + try: + sys.stdout = StringIO() + with change_cwd(SRCDIR): + rc = find_duplicates(argv) + output = sys.stdout.getvalue() + finally: + sys.stdout = save_stdout + output = output.strip() + got_exception = True if not output and rc else False + return got_exception, output.split('\n') if output else [] + def travis(pull_request): if pull_request == 'false': print('Not a pull request; skipping') @@ -230,6 +260,9 @@ def travis(pull_request): c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] doc_files = [fn for fn in file_paths if fn.startswith('Doc') and fn.endswith(('.rst', '.inc'))] + got_exception, duplicates = duplicates_found(python_files) + if duplicates: + print('Please fix the duplicate method definitions found') fixed = [] fixed.extend(normalize_whitespace(python_files)) fixed.extend(normalize_c_whitespace(c_files)) @@ -239,6 +272,7 @@ def travis(pull_request): else: print(f'Please fix the {len(fixed)} file(s) with whitespace issues') print('(on UNIX you can run `make patchcheck` to make the fixes)') + if fixed or duplicates or got_exception : sys.exit(1) def main():