8000 FIX Allow input validation by pass in MLPClassifier by betatim · Pull Request #24873 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX Allow input validation by pass in MLPClassifier #24873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 4, 2023
6 changes: 6 additions & 0 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ Changelog
when the provided `sample_weight` reduces the problem to a single class in `fit`.
:pr:`24140` by :user:`Jonathan Ohayon <Johayon>` and :user:`Chiara Marmo <cmarmo>`.

:mod:`sklearn.neural_network`
.............................
- |Fix| :class:`neural_network.MLPClassifier` and :class:`neural_network.MLPRegressor`
no longer raise warnings when fitting data with feature names.
:pr:`24873` by :user:`Tim Head <betatim>`.

:mod:`sklearn.pipeline`
.......................
- |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g.
Expand Down
34 changes: 29 additions & 5 deletions sklearn/neural_network/_multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ..base import is_classifier
from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS
from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer
from ..metrics import accuracy_score, r2_score
from ..model_selection import train_test_split
from ..preprocessing import LabelBinarizer
from ..utils import gen_batches, check_random_state
Expand Down Expand Up @@ -178,7 +179,7 @@ def _forward_pass(self, activations):

return activations

def _forward_pass_fast(self, X):
def _forward_pass_fast(self, X, check_input=True):
"""Predict using the trained model

This is the same as _forward_pass but does not record the activations
Expand All @@ -189,12 +190,16 @@ def _forward_pass_fast(self, X):
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The input data.

check_input : bool, default=True
Perform input data validation or not.

Returns
-------
y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)
The decision function of the samples for each class in the model.
"""
X = self._validate_data(X, accept_sparse=["csr", "csc"], reset=False)
if check_input:
X = self._validate_data(X, accept_sparse=["csr", "csc"], reset=False)

# Initialize first layer
activation = X
Expand Down Expand Up @@ -694,7 +699,7 @@ def _fit_stochastic(
def _update_no_improvement_count(self, early_stopping, X_val, y_val):
if early_stopping:
# compute validation score, use that for stopping
self.validation_scores_.append(self.score(X_val, y_val))
self.validation_scores_.append(self._score(X_val, y_val))

if self.verbose:
print("Validation score: %f" % self.validation_scores_[-1])
Expand Down Expand Up @@ -1146,13 +1151,22 @@ def predict(self, X):
The predicted classes.
"""
check_is_fitted(self)
y_pred = self._forward_pass_fast(X)
return self._predict(X)

def _predict(self, X, check_input=True):
"""Private predict method with optional input validation"""
y_pred = self._forward_pass_fast(X, check_input=check_input)

if self.n_outputs_ == 1:
y_pred = y_pred.ravel()

return self._label_binarizer.inverse_transform(y_pred)

def _score(self, X, y):
"""Private score method without input validation"""
# Input validation would remove feature names, so we disable it
return accuracy_score(y, self._predict(X, check_input=False))

@available_if(lambda est: est._check_solver())
def partial_fit(self, X, y, classes=None):
"""Update the model with a single iteration over the given data.
Expand Down Expand Up @@ -1574,11 +1588,21 @@ def predict(self, X):
The predicted values.
"""
check_is_fitted(self)
y_pred = self._forward_pass_fast(X)
return self._predict(X)

def _predict(self, X, check_input=True):
"""Private predict method with optional input validation"""
y_pred = self._forward_pass_fast(X, check_input=check_input)
if y_pred.shape[1] == 1:
return y_pred.ravel()
return y_pred

def _score(self, X, y):
"""Private score method without input validation"""
# Input validation would remove feature names, so we disable it
y_pred = self._predict(X, check_input=False)
return r2_score(y, y_pred)

def _validate_input(self, X, y, incremental, reset):
X, y = self._validate_data(
X,
Expand Down
21 changes: 21 additions & 0 deletions sklearn/neural_network/tests/test_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,27 @@ def test_mlp_loading_from_joblib_partial_fit(tmp_path):
assert_allclose(predicted_value, fine_tune_target, rtol=1e-4)


@pytest.mark.parametrize("Estimator", [MLPClassifier, MLPRegressor])
def test_preserve_feature_names(Estimator):
"""Check that feature names are preserved when early stopping is enabled.

Feature names are required for consistency checks during scoring.

Non-regression test for gh-24846
"""
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(0)

X = pd.DataFrame(data=rng.randn(10, 2), columns=["colname_a", "colname_b"])
y = pd.Series(data=np.full(10, 1), name="colname_y")

model = Estimator(early_stopping=True, validation_fraction=0.2)

with warnings.catch_warnings():
warnings.simplefilter("error", UserWarning)
model.fit(X, y)


@pytest.mark.parametrize("MLPEstimator", [MLPClassifier, MLPRegressor])
def test_mlp_warm_start_with_early_stopping(MLPEstimator):
"""Check that early stopping works with warm start."""
Expand Down
0