8
8
9
9
import io
10
10
11
+ from sklearn .cross_decomposition import PLSRegression
11
12
from sklearn .utils ._testing import assert_allclose
12
13
from sklearn .utils ._testing import assert_allclose_dense_sparse
13
14
from sklearn .utils ._testing import assert_array_equal
16
17
# make IterativeImputer available
17
18
from sklearn .experimental import enable_iterative_imputer # noqa
18
19
19
- from sklearn .datasets import load_diabetes
20
+ from sklearn .datasets import load_diabetes , fetch_california_housing
20
21
from sklearn .impute import MissingIndicator
21
22
from sklearn .impute import SimpleImputer , IterativeImputer
22
23
from sklearn .dummy import DummyRegressor
@@ -633,7 +634,7 @@ def test_iterative_imputer_imputation_order(imputation_order):
633
634
634
635
@pytest .mark .parametrize (
635
636
"estimator" ,
636
- [None , DummyRegressor (), BayesianRidge (), ARDRegression (), RidgeCV ()]
637
+ [None , DummyRegressor (), BayesianRidge (), ARDRegression (), RidgeCV (), PLSRegression () ]
637
638
)
638
639
def test_iterative_imputer_estimators (estimator ):
639
640
rng = np .random .RandomState (0 )
@@ -660,6 +661,33 @@ def test_iterative_imputer_estimators(estimator):
660
661
assert len (set (hashes )) == len (hashes )
661
662
662
663
664
+ def test_iterative_imputer_multiple_components ():
665
+
666
+ rng = np .random .RandomState (42 )
667
+
668
+ # get sample data from california housing dataset
669
+ X_california , y_california = fetch_california_housing (return_X_y = True )
670
+ n_samples , n_features = X_california .shape
671
+
672
+ # Add missing values in 75% of the lines
673
+ missing_rate = 0.75
674
+ n_missing_samples = int (n_samples * missing_rate )
675
+ missing_samples = np .zeros (n_samples , dtype = bool )
676
+ missing_samples [: n_missing_samples ] = True
677
+
678
+ rng .shuffle (missing_samples )
679
+ missing_features = rng .randint (0 , n_features , n_missing_samples )
680
+ X_missing = X_california .copy ()
681
+ X_missing [missing_samples , missing_features ] = np .nan
682
+
683
+ # PLSRegression returns multi-dimensional numpy array as
684
+ # opposed to other estimators which return 1-D array,
685
+ # but this should not cause issues in the imputer
686
+ imputer = IterativeImputer (estimator = PLSRegression (n_components = 2 ))
687
+ X_imputed = imputer .fit_transform (X_missing )
688
+ assert X_imputed .shape == X_california .shape
689
+
690
+
663
691
def test_iterative_imputer_clip ():
664
692
rng = np .random .RandomState (0 )
665
693
n = 100
0 commit comments