Closed
Description
as discussed in #13385 we need to ensure all attributes are documented.
if you want to work on this, you should pick a specific submodule and fix all the attribute documentation mismatches in that submodule.
Here's a script to find remaining ones (there might be some false positives):
import numpy as np
from sklearn.base import clone
from sklearn.utils.testing import all_estimators
from sklearn.utils.estimator_checks import pairwise_estimator_convert_X, enforce_estimator_tags_y
from numpydoc import docscrape
ests = all_estimators()
for name, Est in ests:
try:
estimator_orig = Est()
except:
continue
rng = np.random.RandomState(0)
X = pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
X = X.astype(object)
y = (X[:, 0] * 4).astype(np.int)
est = clone(estimator_orig)
y = enforce_estimator_tags_y(est, y)
try:
est.fit(X, y)
except:
continue
fitted_attrs = [(x, getattr(est, x, None))
for x in est.__dict__.keys() if x.endswith("_")
and not x.startswith("_")]
doc = docscrape.ClassDoc(type(est))
doc_attributes = []
incorrect = []
for att_name, type_definition, param_doc in doc['Attributes']:
if not type_definition.strip():
if ':' in att_name and att_name[:att_name.index(':')][-1:].strip():
incorrect += [name +
' There was no space between the param name and '
'colon (%r)' % att_name]
elif name.rstrip().endswith(':'):
incorrect += [name +
' Parameter %r has an empty type spec. '
'Remove the colon' % (att_name.lstrip())]
if '*' not in att_name:
doc_attributes.append(att_name.split(':')[0].strip('` '))
assert incorrect == []
fitted_attrs_names = [x[0] for x in fitted_attrs]
bad = sorted(list(set(fitted_attrs_names) ^ set(doc_attributes)))
if len(bad) > 0:
msg = '{}\n'.format(name) + '\n'.join(bad)
print("Docstring Error: Attribute mismatch in " + msg)