diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 27c9641dec3de..3a2d3d33ce92e 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -830,6 +830,12 @@ Misc indices should be rejected. :issue:`11327` by :user:`Karan Dhingra ` and `Joel Nothman`_. +Preprocessing + +- In :class:`preprocessing.FunctionTransformer`, the default of ``validate`` + will be from ``True`` to ``False`` in 0.22. + :issue:`10655` by :user:`Guillaume Lemaitre `. + Changes to estimator checks --------------------------- diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index f2a1290685992..0c79543338212 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -42,10 +42,16 @@ class FunctionTransformer(BaseEstimator, TransformerMixin): validate : bool, optional default=True Indicate that the input X array should be checked before calling - func. If validate is false, there will be no input validation. - If it is true, then X will be converted to a 2-dimensional NumPy - array or sparse matrix. If this conversion is not possible or X - contains NaN or infinity, an exception is raised. + ``func``. The possibilities are: + + - If False, there is no input validation. + - If True, then X will be converted to a 2-dimensional NumPy array or + sparse matrix. If the conversion is not possible an exception is + raised. + + .. deprecated:: 0.20 + ``validate=True`` as default will be replaced by + ``validate=False`` in 0.22. accept_sparse : boolean, optional Indicate that func accepts a sparse matrix as input. If validate is @@ -72,7 +78,7 @@ class FunctionTransformer(BaseEstimator, TransformerMixin): Dictionary of additional keyword arguments to pass to inverse_func. """ - def __init__(self, func=None, inverse_func=None, validate=True, + def __init__(self, func=None, inverse_func=None, validate=None, accept_sparse=False, pass_y='deprecated', check_inverse=True, kw_args=None, inv_kw_args=None): self.func = func @@ -84,6 +90,19 @@ def __init__(self, func=None, inverse_func=None, validate=True, self.kw_args = kw_args self.inv_kw_args = inv_kw_args + def _check_input(self, X): + # FIXME: Future warning to be removed in 0.22 + if self.validate is None: + self._validate = True + warnings.warn("The default validate=True will be replaced by " + "validate=False in 0.22.", FutureWarning) + else: + self._validate = self.validate + + if self._validate: + return check_array(X, accept_sparse=self.accept_sparse) + return X + def _check_inverse_transform(self, X): """Check that func and inverse_func are the inverse.""" idx_selected = slice(None, None, max(1, X.shape[0] // 100)) @@ -111,8 +130,7 @@ def fit(self, X, y=None): ------- self """ - if self.validate: - X = check_array(X, self.accept_sparse) + X = self._check_input(X) if (self.check_inverse and not (self.func is None or self.inverse_func is None)): self._check_inverse_transform(X) @@ -165,8 +183,7 @@ def inverse_transform(self, X, y='deprecated'): kw_args=self.inv_kw_args) def _transform(self, X, y=None, func=None, kw_args=None): - if self.validate: - X = check_array(X, self.accept_sparse) + X = self._check_input(X) if func is None: func = _identity diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index 4d166457777cc..0bd57a859649f 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from scipy import sparse @@ -145,7 +146,8 @@ def test_check_inverse(): trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, - check_inverse=True) + check_inverse=True, + validate=True) assert_warns_message(UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" @@ -156,15 +158,38 @@ def test_check_inverse(): trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, - check_inverse=True) + check_inverse=True, + validate=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, - check_inverse=True) + check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, - check_inverse=True) + check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) + + +@pytest.mark.parametrize("validate, expected_warning", + [(None, FutureWarning), + (True, None), + (False, None)]) +def test_function_transformer_future_warning(validate, expected_warning): + # FIXME: to be removed in 0.22 + X = np.random.randn(100, 10) + transformer = FunctionTransformer(validate=validate) + with pytest.warns(expected_warning) as results: + transformer.fit_transform(X) + if expected_warning is None: + assert len(results) == 0 + + +def test_function_transformer_frame(): + pd = pytest.importorskip('pandas') + X_df = pd.DataFrame(np.random.randn(100, 10)) + transformer = FunctionTransformer(validate=False) + X_df_trans = transformer.fit_transform(X_df) + assert hasattr(X_df_trans, 'loc')