@@ -410,18 +410,24 @@ def test_imputation_constant_error_invalid_type(X_data, missing_value):
410
410
imputer .fit_transform (X )
411
411
412
412
413
+ # TODO (1.8): check that `keep_empty_features=False` drop the
414
+ # empty features due to the behaviour change.
413
415
def test_imputation_constant_integer ():
414
416
# Test imputation using the constant strategy on integers
415
417
X = np .array ([[- 1 , 2 , 3 , - 1 ], [4 , - 1 , 5 , - 1 ], [6 , 7 , - 1 , - 1 ], [8 , 9 , 0 , - 1 ]])
416
418
417
419
X_true = np .array ([[0 , 2 , 3 , 0 ], [4 , 0 , 5 , 0 ], [6 , 7 , 0 , 0 ], [8 , 9 , 0 , 0 ]])
418
420
419
- imputer = SimpleImputer (missing_values = - 1 , strategy = "constant" , fill_value = 0 )
421
+ imputer = SimpleImputer (
422
+ missing_values = - 1 , strategy = "constant" , fill_value = 0 , keep_empty_features = True
423
+ )
420
424
X_trans = imputer .fit_transform (X )
421
425
422
426
assert_array_equal (X_trans , X_true )
423
427
424
428
429
+ # TODO (1.8): check that `keep_empty_features=False` drop the
430
+ # empty features due to the behaviour change.
425
431
@pytest .mark .parametrize ("array_constructor" , CSR_CONTAINERS + [np .asarray ])
426
432
def test_imputation_constant_float (array_constructor ):
427
433
# Test imputation using the constant strategy on floats
@@ -442,12 +448,16 @@ def test_imputation_constant_float(array_constructor):
442
448
443
449
X_true = array_constructor (X_true )
444
450
445
- imputer = SimpleImputer (strategy = "constant" , fill_value = - 1 )
451
+ imputer = SimpleImputer (
452
+ strategy = "constant" , fill_value = - 1 , keep_empty_features = True
453
+ )
446
454
X_trans = imputer .fit_transform (X )
447
455
448
456
assert_allclose_dense_sparse (X_trans , X_true )
449
457
450
458
459
+ # TODO (1.8): check that `keep_empty_features=False` drop the
460
+ # empty features due to the behaviour change.
451
461
@pytest .mark .parametrize ("marker" , [None , np .nan , "NAN" , "" , 0 ])
452
462
def test_imputation_constant_object (marker ):
453
463
# Test imputation using the constant strategy on objects
@@ -472,13 +482,18 @@ def test_imputation_constant_object(marker):
472
482
)
473
483
474
484
imputer = SimpleImputer (
475
- missing_values = marker , strategy = "constant" , fill_value = "missing"
485
+ missing_values = marker ,
486
+ strategy = "constant" ,
487
+ fill_value = "missing" ,
488
+ keep_empty_features = True ,
476
489
)
477
490
X_trans = imputer .fit_transform (X )
478
491
479
492
assert_array_equal (X_trans , X_true )
480
493
481
494
495
+ # TODO (1.8): check that `keep_empty_features=False` drop the
496
+ # empty features due to the behaviour change.
482
497
@pytest .mark .parametrize ("dtype" , [object , "category" ])
483
498
def test_imputation_constant_pandas (dtype ):
484
499
# Test imputation using the constant strategy on pandas df
@@ -498,7 +513,7 @@ def test_imputation_constant_pandas(dtype):
498
513
dtype = object ,
499
514
)
500
515
501
- imputer = SimpleImputer (strategy = "constant" )
516
+ imputer = SimpleImputer (strategy = "constant" , keep_empty_features = True )
502
517
X_trans = imputer .fit_transform (df )
503
518
504
519
assert_array_equal (X_trans , X_true )
@@ -1514,6 +1529,26 @@ def test_most_frequent(expected, array, dtype, extra_value, n_repeat):
1514
1529
)
1515
1530
1516
1531
1532
+ @pytest .mark .parametrize (
1533
+ "initial_strategy" , ["mean" , "median" , "most_frequent" , "constant" ]
1534
+ )
1535
+ def test_iterative_imputer_keep_empty_features (initial_strategy ):
1536
+ """Check the behaviour of the iterative imputer with different initial strategy
1537
+ and keeping empty features (i.e. features containing only missing values).
1538
+ """
1539
+ X = np .array ([[1 , np .nan , 2 ], [3 , np .nan , np .nan ]])
1540
+
1541
+ imputer = IterativeImputer (
1542
+ initial_strategy = initial_strategy , keep_empty_features = True
1543
+ )
1544
+ X_imputed = imputer .fit_transform (X )
1545
+ assert_allclose (X_imputed [:, 1 ], 0 )
1546
+ X_imputed = imputer .transform (X )
1547
+ assert_allclose (X_imputed [:, 1 ], 0 )
1548
+
1549
+
1550
+ # TODO (1.8): check that `keep_empty_features=False` drop the
1551
+ # empty features due to the behaviour change.
1517
1552
def test_iterative_imputer_constant_fill_value ():
1518
1553
"""Check that we propagate properly the parameter `fill_value`."""
1519
1554
X = np .array ([[- 1 , 2 , 3 , - 1 ], [4 , - 1 , 5 , - 1 ], [6 , 7 , - 1 , - 1 ], [8 , 9 , 0 , - 1 ]])
@@ -1524,6 +1559,7 @@ def test_iterative_imputer_constant_fill_value():
1524
1559
initial_strategy = "constant" ,
1525
1560
fill_value = fill_value ,
1526
1561
max_iter = 0 ,
1562
+ keep_empty_features = True ,
1527
1563
)
1528
1564
imputer .fit_transform (X )
1529
1565
assert_array_equal (imputer .initial_imputer_ .statistics_ , fill_value )
@@ -1722,7 +1758,13 @@ def test_simple_imputer_constant_keep_empty_features(array_type, keep_empty_feat
1722
1758
)
1723
1759
1724
1760
for method in ["fit_transform" , "transform" ]:
1725
- X_imputed = getattr (imputer , method )(X )
1761
+ # TODO(1.8): Remove the condition and still call getattr(imputer, method)(X)
1762
+ if method .startswith ("fit" ) and not keep_empty_features :
1763
+ warn_msg = '`strategy="constant"`, empty features are not dropped. '
1764
+ with pytest .warns (FutureWarning , match = warn_msg ):
1765
+ X_imputed = getattr (imputer , method )(X )
1766
+ else :
1767
+ X_imputed = getattr (imputer , method )(X )
1726
1768
assert X_imputed .shape == X .shape
1727
1769
constant_feature = (
1728
1770
X_imputed [:, 0 ].toarray () if array_type == "sparse" else X_imputed [:, 0 ]
0 commit comments