scikit-learn · jeremiedbb · Nov 24, 2022 · Nov 15, 2022 · Nov 17, 2022 · Nov 17, 2022
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
@@ -9,7 +9,13 @@
 from ..base import clone
 from ..exceptions import ConvergenceWarning
 from ..preprocessing import normalize
-from ..utils import check_array, check_random_state, _safe_indexing, is_scalar_nan
+from ..utils import (
+    check_array,
+    check_random_state,
+    is_scalar_nan,
+    _safe_assign,
+    _safe_indexing,
+)
 from ..utils.validation import FLOAT_DTYPES, check_is_fitted
 from ..utils.validation import _check_feature_names_in
 from ..utils._mask import _get_mask
@@ -25,6 +31,26 @@
 )
 
 
+def _assign_where(X1, X2, cond):
+    """Assign X2 to X1 where cond is True.
+
+    Parameters
+    ----------
+    X1 : ndarray or dataframe of shape (n_samples, n_features)
+        Data.
+
+    X2 : ndarray of shape (n_samples, n_features)
+        Data to be assigned.
+
+    cond : ndarray of shape (n_samples, n_features)
+        Boolean mask to assign data.
+    """
+    if hasattr(X1, "mask"):  # pandas dataframes
+        X1.mask(cond=cond, other=X2, inplace=True)
+    else:  # ndarrays
+        X1[cond] = X2[cond]
+
+
 class IterativeImputer(_BaseImputer):
     """Multivariate imputer that estimates each feature from all the others.
 
@@ -362,16 +388,28 @@ def _impute_one_feature(
 
         missing_row_mask = mask_missing_values[:, feat_idx]
         if fit_mode:
-            X_train = _safe_indexing(X_filled[:, neighbor_feat_idx], ~missing_row_mask)
-            y_train = _safe_indexing(X_filled[:, feat_idx], ~missing_row_mask)
+            X_train = _safe_indexing(
+                _safe_indexing(X_filled, neighbor_feat_idx, axis=1),
+                ~missing_row_mask,
+                axis=0,
+            )
+            y_train = _safe_indexing(
+                _safe_indexing(X_filled, feat_idx, axis=1),
+                ~missing_row_mask,
+                axis=0,
+            )
             estimator.fit(X_train, y_train)
 
         # if no missing values, don't predict
         if np.sum(missing_row_mask) == 0:
             return X_filled, estimator
 
         # get posterior samples if there is at least one missing value
-        X_test = _safe_indexing(X_filled[:, neighbor_feat_idx], missing_row_mask)
+        X_test = _safe_indexing(
+            _safe_indexing(X_filled, neighbor_feat_idx, axis=1),
+            missing_row_mask,
+            axis=0,
+        )
         if self.sample_posterior:
             mus, sigmas = estimator.predict(X_test, return_std=True)
             imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)
@@ -402,7 +440,12 @@ def _impute_one_feature(
             )
 
         # update the feature
-        X_filled[missing_row_mask, feat_idx] = imputed_values
+        _safe_assign(
+            X_filled,
+            imputed_values,
+            row_indexer=missing_row_mask,
+            column_indexer=feat_idx,
+        )
         return X_filled, estimator
 
     def _get_neighbor_feat_idx(self, n_features, feat_idx, abs_corr_mat):
@@ -743,7 +786,8 @@ def fit_transform(self, X, y=None):
                     "[IterativeImputer] Early stopping criterion not reached.",
                     ConvergenceWarning,
                 )
-        Xt[~mask_missing_values] = X[~mask_missing_values]
+        _assign_where(Xt, X, cond=~mask_missing_values)
+
         return super()._concatenate_indicator(Xt, X_indicator)
 
     def transform(self, X):
@@ -796,7 +840,8 @@ def transform(self, X):
                     )
                 i_rnd += 1
 
-        Xt[~mask_missing_values] = X[~mask_missing_values]
+        _assign_where(Xt, X, cond=~mask_missing_values)
+
         return super()._concatenate_indicator(Xt, X_indicator)
 
     def fit(self, X, y=None):

diff --git a/sklearn/impute/tests/test_base.py b/sklearn/impute/tests/test_base.py
@@ -2,8 +2,11 @@
 
 import numpy as np
 
-from sklearn.impute._base import _BaseImputer
 from sklearn.utils._mask import _get_mask
+from sklearn.utils._testing import _convert_container, assert_allclose
+
+from sklearn.impute._base import _BaseImputer
+from sklearn.impute._iterative import _assign_where
 
 
 @pytest.fixture
@@ -87,3 +90,20 @@ def test_base_no_precomputed_mask_transform(data):
         imputer.transform(data)
     with pytest.raises(ValueError, match=err_msg):
         imputer.fit_transform(data)
+
+
+@pytest.mark.parametrize("X1_type", ["array", "dataframe"])
+def test_assign_where(X1_type):
+    """Check the behaviour of the private helpers `_assign_where`."""
+    rng = np.random.RandomState(0)
+
+    n_samples, n_features = 10, 5
+    X1 = _convert_container(rng.randn(n_samples, n_features), constructor_name=X1_type)
+    X2 = rng.randn(n_samples, n_features)
+    mask = rng.randint(0, 2, size=(n_samples, n_features)).astype(bool)
+
+    _assign_where(X1, X2, mask)
+
+    if X1_type == "dataframe":
+        X1 = X1.to_numpy()
+    assert_allclose(X1[mask], X2[mask])
diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
@@ -16,6 +16,7 @@
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
 from ..utils import _safe_indexing
+from ..utils import _safe_assign
 from ..utils import _determine_key_type
 from ..utils import _get_column_indices
 from ..utils.validation import check_is_fitted
@@ -149,10 +150,7 @@ def _partial_dependence_brute(est, grid, features, X, response_method):
     X_eval = X.copy()
     for new_values in grid:
         for i, variable in enumerate(features):
-            if hasattr(X_eval, "iloc"):
-                X_eval.iloc[:, variable] = new_values[i]
-            else:
-                X_eval[:, variable] = new_values[i]
+            _safe_assign(X_eval, new_values[i], column_indexer=variable)
 
         try:
             # Note: predictions is of shape

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
@@ -10,7 +10,7 @@
 from scipy import sparse
 
 from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from ..utils import check_array, is_scalar_nan
+from ..utils import check_array, is_scalar_nan, _safe_indexing
 from ..utils.validation import check_is_fitted
 from ..utils.validation import _check_feature_names_in
 from ..utils._param_validation import Interval, StrOptions, Hidden
@@ -58,21 +58,14 @@ def _check_X(self, X, force_all_finite=True):
         X_columns = []
 
         for i in range(n_features):
-            Xi = self._get_feature(X, feature_idx=i)
+            Xi = _safe_indexing(X, indices=i, axis=1)
             Xi = check_array(
                 Xi, ensure_2d=False, dtype=None, force_all_finite=needs_validation
             )
             X_columns.append(Xi)
 
         return X_columns, n_samples, n_features
 
-    def _get_feature(self, X, feature_idx):
-        if hasattr(X, "iloc"):
-            # pandas dataframes
-            return X.iloc[:, feature_idx]
-        # numpy arrays, sparse arrays
-        return X[:, feature_idx]
-
     def _fit(
         self, X, handle_unknown="error", force_all_finite=True, return_counts=False
     ):

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -79,6 +79,7 @@
     check_transformer_get_feature_names_out_pandas,
     check_set_output_transform,
     check_set_output_transform_pandas,
+    check_global_ouptut_transform_pandas,
 )
 
 
@@ -544,3 +545,18 @@ def test_set_output_transform_pandas(estimator):
     _set_checking_parameters(estimator)
     with ignore_warnings(category=(FutureWarning)):
         check_set_output_transform_pandas(estimator.__class__.__name__, estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", SET_OUTPUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+def test_global_output_transform_pandas(estimator):
+    name = estimator.__class__.__name__
+    if not hasattr(estimator, "set_output"):
+        pytest.skip(
+            f"Skipping check_global_ouptut_transform_pandas for {name}: Does not"
+            " support set_output API yet"
+        )
+    _set_checking_parameters(estimator)
+    with ignore_warnings(category=(FutureWarning)):
+        check_global_ouptut_transform_pandas(estimator.__class__.__name__, estimator)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
@@ -35,6 +35,7 @@
     indexable,
     check_symmetric,
     check_scalar,
+    _is_arraylike_not_scalar,
 )
 from .. import get_config
 from ._bunch import Bunch
@@ -186,13 +187,8 @@ def _array_indexing(array, key, key_dtype, axis):
 
 def _pandas_indexing(X, key, key_dtype, axis):
     """Index a pandas dataframe or a series."""
-    if hasattr(key, "shape"):
-        # Work-around for indexing with read-only key in pandas
-        # FIXME: solved in pandas 0.25
+    if _is_arraylike_not_scalar(key):
         key = np.asarray(key)
-        key = key if key.flags.writeable else key.copy()
-    elif isinstance(key, tuple):
-        key = list(key)
 
     if key_dtype == "int" and not (isinstance(key, slice) or np.isscalar(key)):
         # using take() instead of iloc[] ensures the return value is a "proper"
@@ -362,6 +358,36 @@ def _safe_indexing(X, indices, *, axis=0):
         return _list_indexing(X, indices, indices_dtype)
 
 
+def _safe_assign(X, values, *, row_indexer=None, column_indexer=None):
+    """Safe assignment to a numpy array, sparse matrix, or pandas dataframe.
+
+    Parameters
+    ----------
+    X : {ndarray, sparse-matrix, dataframe}
+        Array to be modified. It is expected to be 2-dimensional.
+
+    values : ndarray
+        The values to be assigned to `X`.
+
+    row_indexer : array-like, dtype={int, bool}, default=None
+        A 1-dimensional array to select the rows of interest. If `None`, all
+        rows are selected.
+
+    column_indexer : array-like, dtype={int, bool}, default=None
+        A 1-dimensional array to select the columns of interest. If `None`, all
+        columns are selected.
+    """
+    row_indexer = slice(None, None, None) if row_indexer is None else row_indexer
+    column_indexer = (
+        slice(None, None, None) if column_indexer is None else column_indexer
+    )
+
+    if hasattr(X, "iloc"):  # pandas dataframe
+        X.iloc[row_indexer, column_indexer] = values
+    else:  # numpy array or sparse matrix
+        X[row_indexer, column_indexer] = values
+
+
 def _get_column_indices(X, key):
     """Get feature column indices for input data X and key.