10000 ENH Fix unfriendly error message for documentation checks (#11967) · rth/scikit-learn@8681ece · GitHub
[go: up one dir, main page]

Skip to content

Commit 8681ece

Browse files
chibby0nerth
authored andcommitted
ENH Fix unfriendly error message for documentation checks (scikit-learn#11967)
Previously error messages for mismatches parameters on function/method signatures and docstrings were not explicit or user-friendly. This changes provides a more explicit error message and in addition notifies the relation of missing/mismatching parameters from signature to docstring and from docstring to signature.
1 parent ee8bdd0 commit 8681ece

File tree

3 files changed

+179
-45
lines changed

3 files changed

+179
-45
lines changed

sklearn/tests/test_docstring_parameters.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
path=sklearn.__path__)
2424
if not ("._" in pckg[1] or ".tests." in pckg[1])])
2525

26-
2726
# functions to ignore args / docstring of
2827
_DOCSTRING_IGNORES = [
2928
'sklearn.utils.deprecation.load_mlcomp',
@@ -73,7 +72,7 @@ def test_docstring_parameters():
7372
this_incorrect = []
7473
if cname in _DOCSTRING_IGNORES or cname.startswith('_'):
7574
continue
76-
if isabstract(cls):
75+
if inspect.isabstract(cls):
7776
continue
7877
with warnings.catch_warnings(record=True) as w:
7978
cdoc = docscrape.ClassDoc(cls)
@@ -85,10 +84,10 @@ def test_docstring_parameters():
8584

8685
if _is_deprecated(cls_init):
8786
continue
88-
8987
elif cls_init is not None:
9088
this_incorrect += check_docstri 8000 ng_parameters(
91-
cls.__init__, cdoc, class_name=cname)
89+
cls.__init__, cdoc)
90+
9291
for method_name in cdoc.methods:
9392
method = getattr(cls, method_name)
9493
if _is_deprecated(method):
@@ -102,7 +101,7 @@ def test_docstring_parameters():
102101
sig.parameters['y'].default is None):
103102
param_ignore = ['y'] # ignore y for fit and score
104103
result = check_docstring_parameters(
105-
method, ignore=param_ignore, class_name=cname)
104+
method, ignore=param_ignore)
106105
this_incorrect += result
107106

108107
incorrect += this_incorrect
@@ -120,9 +119,10 @@ def test_docstring_parameters():
120119
if (not any(d in name_ for d in _DOCSTRING_IGNORES) and
121120
not _is_deprecated(func)):
122121
incorrect += check_docstring_parameters(func)
123-
msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
122+
123+
msg = '\n'.join(incorrect)
124124
if len(incorrect) > 0:
125-
raise AssertionError("Docstring Error: " + msg)
125+
raise AssertionError("Docstring Error:\n" + msg)
126126

127127

128128
@ignore_warnings(category=DeprecationWarning)
@@ -138,7 +138,7 @@ def test_tabs():
138138
# because we don't import
139139
mod = importlib.import_module(modname)
140140
try:
141-
source = getsource(mod)
141+
source = inspect.getsource(mod)
142142
except IOError: # user probably should have run "make clean"
143143
continue
144144
assert '\t' not in source, ('"%s" has tabs, please remove them ',

sklearn/utils/testing.py

Lines changed: 70 additions & 26 deletions
9E88
Original file line numberDiff line numberDiff line change
@@ -697,16 +697,13 @@ def _get_args(function, varargs=False):
697697
return args
698698

699699

700-
def _get_func_name(func, class_name=None):
700+
def _get_func_name(func):
701701
"""Get function full name
702702
703703
Parameters
704704
----------
705705
func : callable
706706
The function object.
707-
class_name : string, optional (default: None)
708-
If ``func`` is a class method and the class name is known specify
709-
class_name for the error message.
710707
711708
Returns
712709
-------
@@ -717,16 +714,16 @@ def _get_func_name(func, class_name=None):
717714
module = inspect.getmodule(func)
718715
if module:
719716
parts.append(module.__name__)
720-
if class_name is not None:
721-
parts.append(class_name)
722-
elif hasattr(func, 'im_class'):
723-
parts.append(func.im_class.__name__)
717+
718+
qualname = func.__qualname__
719+
if qualname != func.__name__:
720+
parts.append(qualname[:qualname.find('.')])
724721

725722
parts.append(func.__name__)
726723
return '.'.join(parts)
727724

728725

729-
def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
726+
def check_docstring_parameters(func, doc=None, ignore=None):
730727
"""Helper to check docstring
731728
732729
Parameters
@@ -737,9 +734,6 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
737734
Docstring if it is passed manually to the test.
738735
ignore : None | list
739736
Parameters to ignore.
740-
class_name : string, optional (default: None)
741-
If ``func`` is a class method and the class name is known specify
742-
class_name for the error message.
743737
744738
Returns
745739
-------
@@ -750,7 +744,7 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
750744
incorrect = []
751745
ignore = [] if ignore is None else ignore
752746

753-
func_name = _get_func_name(func, class_name=class_name)
747+
func_name = _get_func_name(func)
754748
if (not func_name.startswith('sklearn.') or
755749
func_name.startswith('sklearn.externals')):
756750
return incorrect
@@ -763,11 +757,13 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
763757
# Dont check estimator_checks module
764758
if func_name.split('.')[2] == 'estimator_checks':
765759
return incorrect
766-
args = list(filter(lambda x: x not in ignore, _get_args(func)))
760+
# Get the arguments from the function signature
761+
param_signature = list(filter(lambda x: x not in ignore, _get_args(func)))
767762
# drop self
768-
if len(args) > 0 and args[0] == 'self':
769-
args.remove('self')
763+
if len(param_signature) > 0 and param_signature[0] == 'self':
764+ 10000
param_signature.remove('self')
770765

766+
# Analyze function's docstring
771767
if doc is None:
772768
with warnings.catch_warnings(record=True) as w:
773769
try:
@@ -778,8 +774,9 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
778774
if len(w):
779775
raise RuntimeError('Error for %s:\n%s' % (func_name, w[0]))
780776

781-
param_names = []
777+
param_docs = []
782778
for name, type_definition, param_doc in doc['Parameters']:
779+
# Type hints are empty only if parameter name ended with :
783780
if not type_definition.strip():
784781
if ':' in name and name[:name.index(':')][-1:].strip():
785782
incorrect += [func_name +
@@ -790,18 +787,65 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
790787
' Parameter %r has an empty type spec. '
791788
'Remove the colon' % (name.lstrip())]
792789

790+
# Create a list of parameters to compare with the parameters gotten
791+
# from the func signature
793792
if '*' not in name:
794-
param_names.append(name.split(':')[0].strip('` '))
793+
param_docs.append(name.split(':')[0].strip('` '))
795794

796-
param_names = list(filter(lambda x: x not in ignore, param_names))
795+
# If one of the docstring's parameters had an error then return that
796+
# incorrect message
797+
if len(incorrect) > 0:
798+
return incorrect
799+
800+
# Remove the parameters that should be ignored from list
801+
param_docs = list(filter(lambda x: x not in ignore, param_docs))
802+
803+
# The following is derived from pytest, Copyright (c) 2004-2017 Holger
804+
# Krekel and others, Licensed under MIT License. See
805+
# https://github.com/pytest-dev/pytest
806+
807+
message = []
808+
for i in range(min(len(param_docs), len(param_signature))):
809+
if param_signature[i] != param_docs[i]:
810+
message += ["There's a parameter name mismatch in function"
811+
" docstring w.r.t. function signature, at index %s"
812+
" diff: %r != %r" %
813+
(i, param_signature[i], param_docs[i])]
814+
break
815+
if len(param_signature) > len(param_docs):
816+
message += ["Parameters in function docstring have less items w.r.t."
817+
" function signature, first missing item: %s" %
818+
param_signature[len(param_docs)]]
819+
820+
elif len(param_signature) < len(param_docs):
821+
message += ["Parameters in function docstring have more items w.r.t."
822+
" function signature, first extra item: %s" %
823+
param_docs[len(param_signature)]]
824+
825+
# If there wasn't any difference in the parameters themselves between
826+
# docstring and signature including having the same length then return
827+
# empty list
828+
if len(message) == 0:
829+
return []
830+
831+
import difflib
832+
import pprint
833+
834+
param_docs_formatted = pprint.pformat(param_docs).splitlines()
835+
param_signature_formatted = pprint.pformat(param_signature).splitlines()
836+
837+
message += ["Full diff:"]
838+
839+
message.extend(
840+
line.strip() for line in difflib.ndiff(param_signature_formatted,
841+
param_docs_formatted)
842+
)
843+
844+
incorrect.extend(message)
845+
846+
# Prepend function name
847+
incorrect = ['In function: ' + func_name] + incorrect
797848

798-
if len(param_names) != len(args):
799-
bad = str(sorted(list(set(param_names) ^ set(args))))
800-
incorrect += [func_name + ' arg mismatch: ' + bad]
801-
else:
802-
for n1, n2 in zip(param_names, args):
803-
if n1 != n2:
804-
incorrect += [func_name + ' ' + n1 + ' != ' + n2]
805849
return incorrect
806850

807851

sklearn/utils/tests/test_testing.py

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,27 @@ def f_bad_order(b, a):
323323
return c
324324

325325

326+
def f_too_many_param_docstring(a, b):
327+
"""Function f
328+
329+
Parameters
330+
----------
331+
a : int
332+
Parameter a
333+
b : int
334+
Parameter b
335+
c : int
336+
Parameter c
337+
338+
Returns
339+
-------
340+
d : list
341+
Parameter c
342+
"""
343+
d = a + b
344+
return d
345+
346+
326347
def f_missing(a, b):
327348
"""Function f
328349
@@ -469,29 +490,98 @@ def test_check_docstring_parameters():
469490
incorrect == [
470491
"sklearn.utils.tests.test_testing.f_check_param_definition There "
471492
"was no space between the param name and colon ('a: int')",
493+
472494
"sklearn.utils.tests.test_testing.f_check_param_definition There "
473495
"was no space between the param name and colon ('b:')",
496+
474497
"sklearn.utils.tests.test_testing.f_check_param_definition "
475498
"Parameter 'c :' has an empty type spec. Remove the colon",
499+
476500
"sklearn.utils.tests.test_testing.f_check_param_definition There "
477501
"was no space between the param name and colon ('d:int')",
478502
])
479503

480-
messages = ["a != b", "arg mismatch: ['b']", "arg mismatch: ['X', 'y']",
481-
"predict y != X",
482-
"predict_proba arg mismatch: ['X']",
483-
"score arg mismatch: ['X']",
484-
".fit arg mismatch: ['X', 'y']"]
504+
messages = [
505+
["In function: sklearn.utils.tests.test_testing.f_bad_order",
506+
"There's a parameter name mismatch in function docstring w.r.t."
507+
" function signature, at index 0 diff: 'b' != 'a'",
508+
"Full diff:",
509+
"- ['b', 'a']",
510+
"+ ['a', 'b']"],
511+
512+
["In function: " +
513+
"sklearn.utils.tests.test_testing.f_too_many_param_docstring",
514+
"Parameters in function docstring have more items w.r.t. function"
515+
" signature, first extra item: c",
516+
"Full diff:",
517+
"- ['a', 'b']",
518+
"+ ['a', 'b', 'c']",
519+
"? +++++"],
520+
521+
["In function: sklearn.utils.tests.test_testing.f_missing",
522+
"Parameters in function docstring have less items w.r.t. function"
523+
" signature, first missing item: b",
524+
"Full diff:",
525+
"- ['a', 'b']",
526+
"+ ['a']"],
527+
528+
["In function: sklearn.utils.tests.test_testing.Klass.f_missing",
529+
"Parameters in function docstring have less items w.r.t. function"
530+
" signature, first missing item: X",
531+
"Full diff:",
532+
"- ['X', 'y']",
533+
"+ []"],
534+
535+
["In function: " +
536+
"sklearn.utils.tests.test_testing.MockMetaEstimator.predict",
537+
"There's a parameter name mismatch in function docstring w.r.t."
538+
" function signature, at index 0 diff: 'X' != 'y'",
539+
"Full diff:",
540+
"- ['X']",
541+
"? ^",
542+
"+ ['y']",
543+
"? ^"],
544+
545+
["In function: " +
546+
"sklearn.utils.tests.test_testing.MockMetaEstimator."
547+
+ "predict_proba",
548+
"Parameters in function docstring have less items w.r.t. function"
549+
" signature, first missing item: X",
550+
"Full diff:",
551+
"- ['X']",
552+
"+ []"],
553+
554+
["In function: " +
555+
"sklearn.utils.tests.test_testing.MockMetaEstimator.score",
556+
"Parameters in function docstring have less items w.r.t. function"
557+
" signature, first missing item: X",
558+
"Full diff:",
559+
"- ['X']",
560+
"+ []"],
561+
562+
["In function: " +
563+
"sklearn.utils.tests.test_testing.MockMetaEstimator.fit",
564+
"Parameters in function docstring have less items w.r.t. function"
565+
" signature, first missing item: X",
566+
"Full diff:",
567+
"- ['X', 'y']",
568+
"+ []"],
569+
570+
]
485571

486572
mock_meta = MockMetaEstimator(delegate=MockEst())
487573

488-
for mess, f in zip(messages,
489-
[f_bad_order, f_missing, Klass.f_missing,
490-
mock_meta.predict, mock_meta.predict_proba,
491-
mock_meta.score, mock_meta.fit]):
574+
for msg, f in zip(messages,
575+
[f_bad_order,
576+
f_too_many_param_docstring,
577+
f_missing,
578+
Klass.f_missing,
579+
mock_meta.predict,
580+
mock_meta.predict_proba,
581+
mock_meta.score,
582+
mock_meta.fit]):
492583
incorrect = check_docstring_parameters(f)
493-
assert len(incorrect) >= 1
494-
assert mess in incorrect[0], '"%s" not in "%s"' % (mess, incorrect[0])
584+
assert msg == incorrect, ('\n"%s"\n not in \n"%s"' % (msg, incorrect))
495585

496586

497587
class RegistrationCounter:

0 commit comments

Comments
 (0)
0