From 69c880b6cc69f2fc14458feb5a465c8c1b594ce6 Mon Sep 17 00:00:00 2001 From: sergeyf Date: Wed, 23 Jan 2019 14:05:31 -0800 Subject: [PATCH 1/6] changing default for iterativeimputer --- sklearn/impute.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sklearn/impute.py b/sklearn/impute.py index f888d7fe83d4f..ce4452565e950 100644 --- a/sklearn/impute.py +++ b/sklearn/impute.py @@ -868,13 +868,8 @@ def fit_transform(self, X, y=None): .format(self.n_iter)) if self.predictor is None: - if self.sample_posterior: - from .linear_model import BayesianRidge - self._predictor = BayesianRidge() - else: - from .linear_model import RidgeCV - # including a very small alpha to approximate OLS - self._predictor = RidgeCV(alphas=np.array([1e-5, 0.1, 1, 10])) + from .linear_model import BayesianRidge + self._predictor = BayesianRidge() else: self._predictor = clone(self.predictor) From d417047864eb0293ee00a743ea66087f837386d0 Mon Sep 17 00:00:00 2001 From: sergeyf Date: Wed, 23 Jan 2019 14:32:26 -0800 Subject: [PATCH 2/6] also changing impute.rst to pass tests --- doc/modules/impute.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst index 3b029c4d15751..c908b8afc4163 100644 --- a/doc/modules/impute.rst +++ b/doc/modules/impute.rst @@ -106,7 +106,7 @@ round are returned. >>> import numpy as np >>> from sklearn.impute import IterativeImputer >>> imp = IterativeImputer(n_iter=10, random_state=0) - >>> imp.fit([[1, 2], [np.nan, 3], [7, np.nan]]) # doctest: +NORMALIZE_WHITESPACE + >>> imp.fit([[1, 2], [3, 6], [4, 8], [np.nan, 3], [7, np.nan]]) # doctest: +NORMALIZE_WHITESPACE IterativeImputer(imputation_order='ascending', initial_strategy='mean', max_value=None, min_value=None, missing_values=nan, n_iter=10, n_nearest_features=None, predictor=None, random_state=0, @@ -114,8 +114,8 @@ round are returned. >>> X_test = [[np.nan, 2], [6, np.nan], [np.nan, 6]] >>> print(np.round(imp.transform(X_test))) [[ 1. 2.] - [ 6. 3.] - [26. 6.]] + [ 6. 12.] + [ 3. 6.]] Both :class:`SimpleImputer` and :class:`IterativeImputer` can be used in a Pipeline as a way to build a composite estimator that supports imputation. From 55ff0d8b2820bda3dbdb22ec7bcf73dec51b98a8 Mon Sep 17 00:00:00 2001 From: sergeyf Date: Wed, 23 Jan 2019 14:59:04 -0800 Subject: [PATCH 3/6] updating documentation --- sklearn/impute.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearn/impute.py b/sklearn/impute.py index ce4452565e950..6dfce49f7b1f2 100644 --- a/sklearn/impute.py +++ b/sklearn/impute.py @@ -455,13 +455,10 @@ class IterativeImputer(BaseEstimator, TransformerMixin): computed during the final round. A round is a single imputation of each feature with missing values. - predictor : estimator object, default=RidgeCV() or BayesianRidge() + predictor : estimator object, default=BayesianRidge() The predictor to use at each step of the round-robin imputation. If ``sample_posterior`` is True, the predictor must support - ``return_std`` in its ``predict`` method. Also, if - ``sample_posterior=True`` the default predictor will be - :class:`sklearn.linear_model.BayesianRidge` and - :class:`sklearn.linear_model.RidgeCV` otherwise. + ``return_std`` in its ``predict`` method. sample_posterior : boolean, default=False Whether to sample from the (Gaussian) predictive posterior of the From 9ee731cb922d3beefdc842c5d131f8eacbe0a5a3 Mon Sep 17 00:00:00 2001 From: sergeyf Date: Wed, 23 Jan 2019 21:39:50 -0800 Subject: [PATCH 4/6] adding regression test --- sklearn/tests/test_impute.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sklearn/tests/test_impute.py b/sklearn/tests/test_impute.py index 3f347edd00e3e..5400704a7de84 100644 --- a/sklearn/tests/test_impute.py +++ b/sklearn/tests/test_impute.py @@ -572,6 +572,24 @@ def test_iterative_imputer_predictors(predictor): assert len(set(hashes)) == len(hashes) +def test_iterative_imputer_bayesianridge_default(): + rng = np.random.RandomState(0) + + n = 100 + d = 10 + X = sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray() + + imputer = IterativeImputer(missing_values=0, + n_iter=1, + predictor=None, + random_state=rng) + imputer.fit_transform(X) + + # check that types are correct for predictors + for triplet in imputer.imputation_sequence_: + assert isinstance(triplet.predictor, type(BayesianRidge())) + + def test_iterative_imputer_clip(): rng = np.random.RandomState(0) n = 100 From a73e6484ba946802ae0cd0264228de407f201e93 Mon Sep 17 00:00:00 2001 From: sergeyf Date: Thu, 24 Jan 2019 07:38:14 -0800 Subject: [PATCH 5/6] simplifying test --- sklearn/tests/test_impute.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/sklearn/tests/test_impute.py b/sklearn/tests/test_impute.py index 5400704a7de84..62e2f2972c52c 100644 --- a/sklearn/tests/test_impute.py +++ b/sklearn/tests/test_impute.py @@ -547,7 +547,7 @@ def test_iterative_imputer_imputation_order(imputation_order): @pytest.mark.parametrize( "predictor", - [DummyRegressor(), BayesianRidge(), ARDRegression(), RidgeCV()] + [None, DummyRegressor(), BayesianRidge(), ARDRegression(), RidgeCV()] ) def test_iterative_imputer_predictors(predictor): rng = np.random.RandomState(0) @@ -565,31 +565,15 @@ def test_iterative_imputer_predictors(predictor): # check that types are correct for predictors hashes = [] for triplet in imputer.imputation_sequence_: - assert isinstance(triplet.predictor, type(predictor)) + expected_type = type(predictor) if predictor is not None \ + else type(BayesianRidge()) + assert isinstance(triplet.predictor, expected_type) hashes.append(id(triplet.predictor)) # check that each predictor is unique assert len(set(hashes)) == len(hashes) -def test_iterative_imputer_bayesianridge_default(): - rng = np.random.RandomState(0) - - n = 100 - d = 10 - X = sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray() - - imputer = IterativeImputer(missing_values=0, - n_iter=1, - predictor=None, - random_state=rng) - imputer.fit_transform(X) - - # check that types are correct for predictors - for triplet in imputer.imputation_sequence_: - assert isinstance(triplet.predictor, type(BayesianRidge())) - - def test_iterative_imputer_clip(): rng = np.random.RandomState(0) n = 100 From 59260252824c11079dd3f0e417529f368d40b757 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 24 Jan 2019 16:47:09 +0100 Subject: [PATCH 6/6] cosmetic change --- sklearn/tests/test_impute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_impute.py b/sklearn/tests/test_impute.py index 62e2f2972c52c..a2bf8d75ef9e5 100644 --- a/sklearn/tests/test_impute.py +++ b/sklearn/tests/test_impute.py @@ -565,8 +565,8 @@ def test_iterative_imputer_predictors(predictor): # check that types are correct for predictors hashes = [] for triplet in imputer.imputation_sequence_: - expected_type = type(predictor) if predictor is not None \ - else type(BayesianRidge()) + expected_type = (type(predictor) if predictor is not None + else type(BayesianRidge())) assert isinstance(triplet.predictor, expected_type) hashes.append(id(triplet.predictor))