From 5060a2567759d4c3b8a093490928e1c7a78aeaff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 13 Sep 2021 15:09:18 +0200
Subject: [PATCH 1/6] Raise an error when all fits failed in cross-validation
 or grid-search

---
 sklearn/model_selection/_search.py            |  4 +-
 sklearn/model_selection/_validation.py        | 35 ++++++-----
 sklearn/model_selection/tests/test_search.py  | 37 ++++++++++--
 .../model_selection/tests/test_validation.py  | 60 ++++++++++++++-----
 4 files changed, 100 insertions(+), 36 deletions(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index c13e5b6643ce1..b5e0a52e238fc 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -31,7 +31,7 @@
 from ._validation import _aggregate_score_dicts
 from ._validation import _insert_error_scores
 from ._validation import _normalize_score_results
-from ._validation import _warn_about_fit_failures
+from ._validation import _warn_or_raise_about_fit_failures
 from ..exceptions import NotFittedError
 from joblib import Parallel
 from ..utils import check_random_state
@@ -865,7 +865,7 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
                         "splits, got {}".format(n_splits, len(out) // n_candidates)
                     )
 
-                _warn_about_fit_failures(out, self.error_score)
+                _warn_or_raise_about_fit_failures(out, self.error_score)
 
                 # For callable self.scoring, the return type is only know after
                 # calling. If the return type is a dictionary, the error scores
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 760418b7d8f54..015ec5d43db79 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -283,7 +283,7 @@ def cross_validate(
         for train, test in cv.split(X, y, groups)
     )
 
-    _warn_about_fit_failures(results, error_score)
+    _warn_or_raise_about_fit_failures(results, error_score)
 
     # For callabe scoring, the return type is only know after calling. If the
     # return type is a dictionary, the error scores can now be inserted with
@@ -327,9 +327,6 @@ def _insert_error_scores(results, error_score):
         elif successful_score is None:
             successful_score = result["test_scores"]
 
-    if successful_score is None:
-        raise NotFittedError("All estimators failed to fit")
-
     if isinstance(successful_score, dict):
         formatted_error = {name: error_score for name in successful_score}
         for i in failed_indices:
@@ -347,7 +344,7 @@ def _normalize_score_results(scores, scaler_score_key="score"):
     return {scaler_score_key: scores}
 
 
-def _warn_about_fit_failures(results, error_score):
+def _warn_or_raise_about_fit_failures(results, error_score):
     fit_errors = [
         result["fit_error"] for result in results if result["fit_error"] is not None
     ]
@@ -361,15 +358,25 @@ def _warn_about_fit_failures(results, error_score):
             for error, n in fit_errors_counter.items()
         )
 
-        some_fits_failed_message = (
-            f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
-            "The score on these train-test partitions for these parameters"
-            f" will be set to {error_score}.\n"
-            "If these failures are not expected, you can try to debug them "
-            "by setting error_score='raise'.\n\n"
-            f"Below are more details about the failures:\n{fit_errors_summary}"
-        )
-        warnings.warn(some_fits_failed_message, FitFailedWarning)
+        if num_failed_fits == num_fits:
+            all_fits_failed_message = (
+                f"\nAll the {num_fits} fits failed.\n"
+                "It is is very likely that your model is misconfigured.\n"
+                "You can try to debug the error by setting error_score='raise'.\n\n"
+                f"Below are more details about the failures:\n{fit_errors_summary}"
+            )
+            raise NotFittedError(all_fits_failed_message)
+
+        else:
+            some_fits_failed_message = (
+                f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
+                "The score on these train-test partitions for these parameters"
+                f" will be set to {error_score}.\n"
+                "If these failures are not expected, you can try to debug them "
+                "by setting error_score='raise'.\n\n"
+                f"Below are more details about the failures:\n{fit_errors_summary}"
+            )
+            warnings.warn(some_fits_failed_message, FitFailedWarning)
 
 
 def cross_val_score(
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 6960a17fb629b..87c92e3c8cc94 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1628,6 +1628,27 @@ def get_cand_scores(i):
     assert gs.best_index_ != clf.FAILING_PARAMETER
 
 
+def test_grid_search_classifier_all_fits_fail():
+    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
+
+    clf = FailingClassifier()
+
+    gs = GridSearchCV(
+        clf,
+        [{"parameter": [FailingClassifier.FAILING_PARAMETER] * 3}],
+        error_score=0.0,
+    )
+
+    warning_message = re.compile(
+        "All the 15 fits failed.+"
+        "15 fits failed with the following error.+ValueError.+Failing classifier failed"
+        " as required",
+        flags=re.DOTALL,
+    )
+    with pytest.raises(NotFittedError, match=warning_message):
+        gs.fit(X, y)
+
+
 def test_grid_search_failing_classifier_raise():
     # GridSearchCV with on_error == 'raise' raises the error
 
@@ -2130,7 +2151,7 @@ def custom_scorer(est, X, y):
     assert_allclose(gs.cv_results_["mean_test_acc"], [1, 1, 0.1])
 
 
-def test_callable_multimetric_clf_all_fails():
+def test_callable_multimetric_clf_all_fits_fail():
     # Warns and raises when all estimator fails to fit.
     def custom_scorer(est, X, y):
         return {"acc": 1}
@@ -2141,16 +2162,20 @@ def custom_scorer(est, X, y):
 
     gs = GridSearchCV(
         clf,
-        [{"parameter": [2, 2, 2]}],
+        [{"parameter": [FailingClassifier.FAILING_PARAMETER] * 3}],
         scoring=custom_scorer,
         refit=False,
         error_score=0.1,
     )
 
-    with pytest.warns(
-        FitFailedWarning,
-        match="15 fits failed.+total of 15",
-    ), pytest.raises(NotFittedError, match="All estimators failed to fit"):
+    individual_fit_error_message = "ValueError: Failing classifier failed as required"
+    error_message = re.compile(
+        "All the 15 fits failed.+your model is misconfigured.+"
+        f"{individual_fit_error_message}",
+        flags=re.DOTALL,
+    )
+
+    with pytest.raises(NotFittedError, match=error_message):
         gs.fit(X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 215ceb5877669..397fb0b5b5d0d 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -11,6 +11,7 @@
 import numpy as np
 from scipy.sparse import coo_matrix, csr_matrix
 from sklearn.exceptions import FitFailedWarning
+from sklearn.exceptions import NotFittedError
 
 from sklearn.model_selection.tests.test_search import FailingClassifier
 
@@ -2130,38 +2131,69 @@ def test_fit_and_score_working():
     assert result["parameters"] == fit_and_score_kwargs["parameters"]
 
 
+class DataDependentFailingClassifier(BaseEstimator):
+    def __init__(self, max_x_value=None):
+        self.max_x_value = max_x_value
+
+    def fit(self, X, y=None):
+        num_values_too_high = (X > self.max_x_value).sum()
+        if num_values_too_high:
+            raise ValueError(
+                f"Classifier fit failed with {num_values_too_high} values too high"
+            )
+
+    def predict(self, X):
+        return np.zeros(X.shape[0])
+
+    def score(self, X=None, Y=None):
+        return 0.0
+
+
 @pytest.mark.parametrize("error_score", [np.nan, 0])
-def test_cross_validate_failing_fits_warnings(error_score):
+def test_cross_validate_some_failing_fits_warning(error_score):
     # Create a failing classifier to deliberately fail
-    failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
+    failing_clf = DataDependentFailingClassifier(max_x_value=8)
     # dummy X data
     X = np.arange(1, 10)
     y = np.ones(9)
-    # fit_and_score_args = [failing_clf, X, None, dict(), None, None, 0, None, None]
     # passing error score to trigger the warning message
     cross_validate_args = [failing_clf, X, y]
-    cross_validate_kwargs = {"cv": 7, "error_score": error_score}
+    cross_validate_kwargs = {"cv": 3, "error_score": error_score}
     # check if the warning message type is as expected
+
+    individual_fit_error_message = (
+        "ValueError: Classifier fit failed with 1 values too high"
+    )
     warning_message = re.compile(
-        "7 fits failed.+total of 7.+The score on these"
+        "2 fits failed.+total of 3.+The score on these"
         " train-test partitions for these parameters will be set to"
-        f" {cross_validate_kwargs['error_score']}.",
+        f" {cross_validate_kwargs['error_score']}.+{individual_fit_error_message}",
         flags=re.DOTALL,
     )
 
     with pytest.warns(FitFailedWarning, match=warning_message):
         cross_validate(*cross_validate_args, **cross_validate_kwargs)
 
-    # since we're using FailingClassfier, our error will be the following
-    error_message = "ValueError: Failing classifier failed as required"
 
-    # check traceback is included
-    warning_message = re.compile(
-        "The score on these train-test partitions for these parameters will be set"
-        f" to {cross_validate_kwargs['error_score']}.+{error_message}",
-        re.DOTALL,
+@pytest.mark.parametrize("error_score", [np.nan, 0])
+def test_cross_validate_all_failing_fits_error(error_score):
+    # Create a failing classifier to deliberately fail
+    failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
+    # dummy X data
+    X = np.arange(1, 10)
+    y = np.ones(9)
+
+    cross_validate_args = [failing_clf, X, y]
+    cross_validate_kwargs = {"cv": 7, "error_score": error_score}
+
+    individual_fit_error_message = "ValueError: Failing classifier failed as required"
+    error_message = re.compile(
+        "All the 7 fits failed.+your model is misconfigured.+"
+        f"{individual_fit_error_message}",
+        flags=re.DOTALL,
     )
-    with pytest.warns(FitFailedWarning, match=warning_message):
+
+    with pytest.raises(NotFittedError, match=error_message):
         cross_validate(*cross_validate_args, **cross_validate_kwargs)
 
 

From defae07fd3c931ffd23fee7543bfe1ad0d14cee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Tue, 14 Sep 2021 16:25:27 +0200
Subject: [PATCH 2/6] Fix test error by lowering n_splits.

The number of unique groups was less than the number of splits.
---
 sklearn/model_selection/tests/test_split.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index af8338792ad73..765cf4eefa7de 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -1774,7 +1774,7 @@ def test_nested_cv():
         LeaveOneOut(),
         GroupKFold(n_splits=3),
         StratifiedKFold(),
-        StratifiedGroupKFold(),
+        StratifiedGroupKFold(n_splits=3),
         StratifiedShuffleSplit(n_splits=3, random_state=0),
     ]
 

From 94ddfead18d135ae35033bf1d14367ddbea74930 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 27 Sep 2021 11:23:21 +0200
Subject: [PATCH 3/6] Remove unused predict

---
 sklearn/model_selection/tests/test_validation.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 397fb0b5b5d0d..632b815b56bd5 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -2142,9 +2142,6 @@ def fit(self, X, y=None):
                 f"Classifier fit failed with {num_values_too_high} values too high"
             )
 
-    def predict(self, X):
-        return np.zeros(X.shape[0])
-
     def score(self, X=None, Y=None):
         return 0.0
 

From 8b81ceca0a348671ad7423edfba19c0db7b4065a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 27 Sep 2021 11:34:53 +0200
Subject: [PATCH 4/6] Add changelog and reorder alphabetically

---
 doc/whats_new/v1.1.rst | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 3aabed6214771..3fa6ba65bdbac 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -45,6 +45,21 @@ Changelog
   message when the solver does not support sparse matrices with int64 indices.
   :pr:`21093` by `Tom Dupre la Tour`_.
 
+:mod:`sklearn.model_selection`
+..............................
+
+- |Enhancement| raise an error during cross-validation when the fits for all the
+  splits failed. Similarly raise an error during grid-search when the fits for
+  all the models and all the splits failed. :pr:`20619` by :user:`Loïc Estève
+  <lesteve>`.
+
+:mod:`sklearn.pipeline`
+.......................
+
+- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
+  Setting a transformer to "passthrough" will pass the features unchanged.
+  :pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.
+
 :mod:`sklearn.utils`
 ....................
 
@@ -60,13 +75,6 @@ Changelog
   :pr:`20880` by :user:`Guillaume Lemaitre <glemaitre>`
   and :user:`András Simon <simonandras>`.
 
-:mod:`sklearn.pipeline`
-.......................
-
-- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
-  Setting a transformer to "passthrough" will pass the features unchanged.
-  :pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.
-
 Code and Documentation Contributors
 -----------------------------------
 

From 561ff89339d7b76e8bccc48e1ea8798610659017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 27 Sep 2021 11:42:12 +0200
Subject: [PATCH 5/6] Fix PR number

---
 doc/whats_new/v1.1.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 3fa6ba65bdbac..cddca130c432b 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -50,8 +50,7 @@ Changelog
 
 - |Enhancement| raise an error during cross-validation when the fits for all the
   splits failed. Similarly raise an error during grid-search when the fits for
-  all the models and all the splits failed. :pr:`20619` by :user:`Loïc Estève
-  <lesteve>`.
+  all the models and all the splits failed. :pr:`21026` by :user:`Loïc Estève <lesteve>`.
 
 :mod:`sklearn.pipeline`
 .......................

From a63c0b47ca131cf3f785b90fa59f387392a29f13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 4 Oct 2021 10:29:59 +0200
Subject: [PATCH 6/6] Replace NotFittedError by ValueError

---
 sklearn/model_selection/_validation.py           | 4 ++--
 sklearn/model_selection/tests/test_search.py     | 5 ++---
 sklearn/model_selection/tests/test_validation.py | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 015ec5d43db79..2f8566d80533e 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -30,7 +30,7 @@
 from ..utils.metaestimators import _safe_split
 from ..metrics import check_scoring
 from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
-from ..exceptions import FitFailedWarning, NotFittedError
+from ..exceptions import FitFailedWarning
 from ._split import check_cv
 from ..preprocessing import LabelEncoder
 
@@ -365,7 +365,7 @@ def _warn_or_raise_about_fit_failures(results, error_score):
                 "You can try to debug the error by setting error_score='raise'.\n\n"
                 f"Below are more details about the failures:\n{fit_errors_summary}"
             )
-            raise NotFittedError(all_fits_failed_message)
+            raise ValueError(all_fits_failed_message)
 
         else:
             some_fits_failed_message = (
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 87c92e3c8cc94..df54c5a51afb4 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -29,7 +29,6 @@
 
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.base import is_classifier
-from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
@@ -1645,7 +1644,7 @@ def test_grid_search_classifier_all_fits_fail():
         " as required",
         flags=re.DOTALL,
     )
-    with pytest.raises(NotFittedError, match=warning_message):
+    with pytest.raises(ValueError, match=warning_message):
         gs.fit(X, y)
 
 
@@ -2175,7 +2174,7 @@ def custom_scorer(est, X, y):
         flags=re.DOTALL,
     )
 
-    with pytest.raises(NotFittedError, match=error_message):
+    with pytest.raises(ValueError, match=error_message):
         gs.fit(X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 632b815b56bd5..a66c6ae653a6f 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -11,7 +11,6 @@
 import numpy as np
 from scipy.sparse import coo_matrix, csr_matrix
 from sklearn.exceptions import FitFailedWarning
-from sklearn.exceptions import NotFittedError
 
 from sklearn.model_selection.tests.test_search import FailingClassifier
 
@@ -2190,7 +2189,7 @@ def test_cross_validate_all_failing_fits_error(error_score):
         flags=re.DOTALL,
     )
 
-    with pytest.raises(NotFittedError, match=error_message):
+    with pytest.raises(ValueError, match=error_message):
         cross_validate(*cross_validate_args, **cross_validate_kwargs)