diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index f52639034f675..c91540add698d 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -86,6 +86,12 @@ Changelog when the provided `sample_weight` reduces the problem to a single class in `fit`. :pr:`24140` by :user:`Jonathan Ohayon ` and :user:`Chiara Marmo `. +:mod:`sklearn.neural_network` +............................. +- |Fix| :class:`neural_network.MLPClassifier` and :class:`neural_network.MLPRegressor` + no longer raise warnings when fitting data with feature names. + :pr:`24873` by :user:`Tim Head `. + :mod:`sklearn.pipeline` ....................... - |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g. diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 082c0200871cd..7ed0ab33a0f29 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -23,6 +23,7 @@ from ..base import is_classifier from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer +from ..metrics import accuracy_score, r2_score from ..model_selection import train_test_split from ..preprocessing import LabelBinarizer from ..utils import gen_batches, check_random_state @@ -178,7 +179,7 @@ def _forward_pass(self, activations): return activations - def _forward_pass_fast(self, X): + def _forward_pass_fast(self, X, check_input=True): """Predict using the trained model This is the same as _forward_pass but does not record the activations @@ -189,12 +190,16 @@ def _forward_pass_fast(self, X): X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. + check_input : bool, default=True + Perform input data validation or not. + Returns ------- y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs) The decision function of the samples for each class in the model. """ - X = self._validate_data(X, accept_sparse=["csr", "csc"], reset=False) + if check_input: + X = self._validate_data(X, accept_sparse=["csr", "csc"], reset=False) # Initialize first layer activation = X @@ -694,7 +699,7 @@ def _fit_stochastic( def _update_no_improvement_count(self, early_stopping, X_val, y_val): if early_stopping: # compute validation score, use that for stopping - self.validation_scores_.append(self.score(X_val, y_val)) + self.validation_scores_.append(self._score(X_val, y_val)) if self.verbose: print("Validation score: %f" % self.validation_scores_[-1]) @@ -1146,13 +1151,22 @@ def predict(self, X): The predicted classes. """ check_is_fitted(self) - y_pred = self._forward_pass_fast(X) + return self._predict(X) + + def _predict(self, X, check_input=True): + """Private predict method with optional input validation""" + y_pred = self._forward_pass_fast(X, check_input=check_input) if self.n_outputs_ == 1: y_pred = y_pred.ravel() return self._label_binarizer.inverse_transform(y_pred) + def _score(self, X, y): + """Private score method without input validation""" + # Input validation would remove feature names, so we disable it + return accuracy_score(y, self._predict(X, check_input=False)) + @available_if(lambda est: est._check_solver()) def partial_fit(self, X, y, classes=None): """Update the model with a single iteration over the given data. @@ -1574,11 +1588,21 @@ def predict(self, X): The predicted values. """ check_is_fitted(self) - y_pred = self._forward_pass_fast(X) + return self._predict(X) + + def _predict(self, X, check_input=True): + """Private predict method with optional input validation""" + y_pred = self._forward_pass_fast(X, check_input=check_input) if y_pred.shape[1] == 1: return y_pred.ravel() return y_pred + def _score(self, X, y): + """Private score method without input validation""" + # Input validation would remove feature names, so we disable it + y_pred = self._predict(X, check_input=False) + return r2_score(y, y_pred) + def _validate_input(self, X, y, incremental, reset): X, y = self._validate_data( X, diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index 94612130419f7..a4d4831766170 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -894,6 +894,27 @@ def test_mlp_loading_from_joblib_partial_fit(tmp_path): assert_allclose(predicted_value, fine_tune_target, rtol=1e-4) +@pytest.mark.parametrize("Estimator", [MLPClassifier, MLPRegressor]) +def test_preserve_feature_names(Estimator): + """Check that feature names are preserved when early stopping is enabled. + + Feature names are required for consistency checks during scoring. + + Non-regression test for gh-24846 + """ + pd = pytest.importorskip("pandas") + rng = np.random.RandomState(0) + + X = pd.DataFrame(data=rng.randn(10, 2), columns=["colname_a", "colname_b"]) + y = pd.Series(data=np.full(10, 1), name="colname_y") + + model = Estimator(early_stopping=True, validation_fraction=0.2) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + model.fit(X, y) + + @pytest.mark.parametrize("MLPEstimator", [MLPClassifier, MLPRegressor]) def test_mlp_warm_start_with_early_stopping(MLPEstimator): """Check that early stopping works with warm start."""