diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 7cb0b5e923071..d5d97f8d3f472 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -9,7 +9,6 @@ # List of modules ignored when checking for numpydoc validation. DOCSTRING_IGNORE_LIST = [ - "GridSearchCV", "HalvingGridSearchCV", "HalvingRandomSearchCV", "HashingVectorizer", diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 0ff04e94298a6..746ec5e9a0813 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -432,7 +432,7 @@ def _pairwise(self): return getattr(self.estimator, "_pairwise", False) def score(self, X, y=None): - """Returns the score on the given data, if the estimator has been refit. + """Return the score on the given data, if the estimator has been refit. This uses the score defined by ``scoring`` where provided, and the ``best_estimator_.score`` method otherwise. @@ -451,6 +451,8 @@ def score(self, X, y=None): Returns ------- score : float + The score defined by ``scoring`` if provided, and the + ``best_estimator_.score`` method otherwise. """ _check_refit(self, "score") check_is_fitted(self) @@ -491,6 +493,7 @@ def score_samples(self, X): Returns ------- y_score : ndarray of shape (n_samples,) + The ``best_estimator_.score_samples`` method. """ check_is_fitted(self) return self.best_estimator_.score_samples(X) @@ -508,6 +511,11 @@ def predict(self, X): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + y_pred : ndarray of shape (n_samples,) + The predicted labels or values for `X` based on the estimator with + the best found parameters. """ check_is_fitted(self) return self.best_estimator_.predict(X) @@ -525,6 +533,12 @@ def predict_proba(self, X): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes) + Predicted class probabilities for `X` based on the estimator with + the best found parameters. The order of the classes corresponds + to that in the fitted attribute :term:`classes_`. """ check_is_fitted(self) return self.best_estimator_.predict_proba(X) @@ -542,6 +556,12 @@ def predict_log_proba(self, X): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes) + Predicted class log-probabilities for `X` based on the estimator + with the best found parameters. The order of the classes + corresponds to that in the fitted attribute :term:`classes_`. """ check_is_fitted(self) return self.best_estimator_.predict_log_proba(X) @@ -559,6 +579,12 @@ def decision_function(self, X): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + y_score : ndarray of shape (n_samples,) or (n_samples, n_classes) \ + or (n_samples, n_classes * (n_classes-1) / 2) + Result of the decision function for `X` based on the estimator with + the best found parameters. """ check_is_fitted(self) return self.best_estimator_.decision_function(X) @@ -576,6 +602,11 @@ def transform(self, X): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) + `X` transformed in the new space based on the estimator with + the best found parameters. """ check_is_fitted(self) return self.best_estimator_.transform(X) @@ -593,12 +624,21 @@ def inverse_transform(self, Xt): Must fulfill the input assumptions of the underlying estimator. + Returns + ------- + X : {ndarray, sparse matrix} of shape (n_samples, n_features) + Result of the `inverse_transform` function for `Xt` based on the + estimator with the best found parameters. """ check_is_fitted(self) return self.best_estimator_.inverse_transform(Xt) @property def n_features_in_(self): + """Number of features seen during :term:`fit`. + + Only available when `refit=True`. + """ # For consistency with other estimators we raise a AttributeError so # that hasattr() fails if the search estimator isn't fitted. try: @@ -614,6 +654,10 @@ def n_features_in_(self): @property def classes_(self): + """Class labels. + + Only available when `refit=True` and the estimator is a classifier. + """ _estimator_has("classes_")(self) return self.best_estimator_.classes_ @@ -733,7 +777,12 @@ def fit(self, X, y=None, *, groups=None, **fit_params): instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). **fit_params : dict of str -> object - Parameters passed to the ``fit`` method of the estimator + Parameters passed to the ``fit`` method of the estimator. + + Returns + ------- + self : object + Instance of fitted estimator. """ estimator = self.estimator refit_metric = "score" @@ -1002,7 +1051,7 @@ class GridSearchCV(BaseSearchCV): Parameters ---------- - estimator : estimator object. + estimator : estimator object This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed. @@ -1137,25 +1186,6 @@ class GridSearchCV(BaseSearchCV): .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False`` - - Examples - -------- - >>> from sklearn import svm, datasets - >>> from sklearn.model_selection import GridSearchCV - >>> iris = datasets.load_iris() - >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} - >>> svc = svm.SVC() - >>> clf = GridSearchCV(svc, parameters) - >>> clf.fit(iris.data, iris.target) - GridSearchCV(estimator=SVC(), - param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')}) - >>> sorted(clf.cv_results_.keys()) - ['mean_fit_time', 'mean_score_time', 'mean_test_score',... - 'param_C', 'param_kernel', 'params',... - 'rank_test_score', 'split0_test_score',... - 'split2_test_score', ... - 'std_fit_time', 'std_score_time', 'std_test_score'] - Attributes ---------- cv_results_ : dict of numpy (masked) ndarrays @@ -1308,6 +1338,23 @@ class GridSearchCV(BaseSearchCV): sklearn.metrics.make_scorer : Make a scorer from a performance metric or loss function. + Examples + -------- + >>> from sklearn import svm, datasets + >>> from sklearn.model_selection import GridSearchCV + >>> iris = datasets.load_iris() + >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} + >>> svc = svm.SVC() + >>> clf = GridSearchCV(svc, parameters) + >>> clf.fit(iris.data, iris.target) + GridSearchCV(estimator=SVC(), + param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')}) + >>> sorted(clf.cv_results_.keys()) + ['mean_fit_time', 'mean_score_time', 'mean_test_score',... + 'param_C', 'param_kernel', 'params',... + 'rank_test_score', 'split0_test_score',... + 'split2_test_score', ... + 'std_fit_time', 'std_score_time', 'std_test_score'] """ _required_parameters = ["estimator", "param_grid"]