scikit-learn · jeremiedbb · Feb 28, 2023 · Nov 28, 2021 · Jan 23, 2023 · Jan 23, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -159,6 +159,14 @@ Changelog
 - |Enhancement| Added the parameter `fill_value` to :class:`impute.IterativeImputer`.
   :pr:`25232` by :user:`Thijs van Weezel <ValueInvestorThijs>`.
 
+:mod:`sklearn.inspection`
+.........................
+
+- |API| :func:`inspection.partial_dependence` returns a :class:`utils.Bunch` with
+  new key: `pdp_values`. The `values` key is deprecated in favor of `pdp_values`
+  and the `values` key will be removed in 1.5.
+  :pr:`21809` by `Thomas Fan`_.
+
 :mod:`sklearn.linear_model`
 ...........................
 

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
@@ -365,16 +365,26 @@ def partial_dependence(
             Only available when ``kind='both'``.
 
         values : seq of 1d ndarrays
+            The values with which the grid has been created.
+
+            .. deprecated:: 1.3
+                The key `values` has been deprecated in 1.3 and will be removed
+                in 1.5 in favor of `pdp_values`. See `pdp_values` for details
+                about the `values` attribute.
+
+        pdp_values : seq of 1d ndarrays
             The values with which the grid has been created. The generated
-            grid is a cartesian product of the arrays in ``values``.
-            ``len(values) == len(features)``. The size of each array
-            ``values[j]`` is either ``grid_resolution``, or the number of
+            grid is a cartesian product of the arrays in ``pdp_values``.
+            ``len(pdp_values) == len(features)``. The size of each array
+            ``pdp_values[j]`` is either ``grid_resolution``, or the number of
             unique values in ``X[:, j]``, whichever is smaller.
 
+            .. versionadded:: 1.3
+
         ``n_outputs`` corresponds to the number of classes in a multi-class
         setting, or to the number of tasks for multi-output regression.
         For classical regression and binary classification ``n_outputs==1``.
-        ``n_values_feature_j`` corresponds to the size ``values[j]``.
+        ``n_values_feature_j`` corresponds to the size ``pdp_values[j]``.
 
     See Also
     --------
@@ -547,14 +557,22 @@ def partial_dependence(
     averaged_predictions = averaged_predictions.reshape(
         -1, *[val.shape[0] for val in values]
     )
+    pdp_results = Bunch()
+
+    msg = (
+        "Key: 'values', is deprecated in 1.3 and will be removed in 1.5. "
+        "Please use 'pdp_values' instead."
+    )
+    pdp_results._set_deprecated(
+        values, new_key="pdp_values", deprecated_key="values", warning_message=msg
+    )
 
     if kind == "average":
-        return Bunch(average=averaged_predictions, values=values)
+        pdp_results["average"] = averaged_predictions
     elif kind == "individual":
-        return Bunch(individual=predictions, values=values)
+        pdp_results["individual"] = predictions
     else:  # kind='both'
-        return Bunch(
-            average=averaged_predictions,
-            individual=predictions,
-            values=values,
-        )
+        pdp_results["average"] = averaged_predictions
+        pdp_results["individual"] = predictions
+
+    return pdp_results
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
@@ -1256,7 +1256,7 @@ def plot(
         else:
             pd_results_ = []
             for kind_plot, pd_result in zip(kind, self.pd_results):
-                current_results = {"values": pd_result["values"]}
+                current_results = {"pdp_values": pd_result["pdp_values"]}
 
                 if kind_plot in ("individual", "both"):
                     preds = pd_result.individual
@@ -1274,7 +1274,7 @@ def plot(
             # get global min and max average predictions of PD grouped by plot type
             pdp_lim = {}
             for kind_plot, pdp in zip(kind, pd_results_):
-                values = pdp["values"]
+                values = pdp["pdp_values"]
                 preds = pdp.average if kind_plot == "average" else pdp.individual
                 min_pd = preds[self.target_idx].min()
                 max_pd = preds[self.target_idx].max()
@@ -1402,7 +1402,7 @@ def plot(
         ):
             avg_preds = None
             preds = None
-            feature_values = pd_result["values"]
+            feature_values = pd_result["pdp_values"]
             if kind_plot == "individual":
                 preds = pd_result.individual
             elif kind_plot == "average":

diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
@@ -103,7 +103,7 @@ def test_plot_partial_dependence(grid_resolution, pyplot, clf_diabetes, diabetes
         target_idx = disp.target_idx
 
         line_data = line.get_data()
-        assert_allclose(line_data[0], avg_preds["values"][0])
+        assert_allclose(line_data[0], avg_preds["pdp_values"][0])
         assert_allclose(line_data[1], avg_preds.average[target_idx].ravel())
 
     # two feature position
@@ -243,7 +243,7 @@ def test_plot_partial_dependence_str_features(
     assert line.get_alpha() == 0.8
 
     line_data = line.get_data()
-    assert_allclose(line_data[0], avg_preds["values"][0])
+    assert_allclose(line_data[0], avg_preds["pdp_values"][0])
     assert_allclose(line_data[1], avg_preds.average[target_idx].ravel())
 
     # contour
@@ -279,7 +279,7 @@ def test_plot_partial_dependence_custom_axes(pyplot, clf_diabetes, diabetes):
     target_idx = disp.target_idx
 
     line_data = line.get_data()
-    assert_allclose(line_data[0], avg_preds["values"][0])
+    assert_allclose(line_data[0], avg_preds["pdp_values"][0])
     assert_allclose(line_data[1], avg_preds.average[target_idx].ravel())
 
     # contour
@@ -466,7 +466,7 @@ def test_plot_partial_dependence_multiclass(pyplot):
         disp_target_0.pd_results, disp_symbol.pd_results
     ):
         assert_allclose(int_result.average, symbol_result.average)
-        assert_allclose(int_result["values"], symbol_result["values"])
+        assert_allclose(int_result["pdp_values"], symbol_result["pdp_values"])
 
     # check that the pd plots are different for another target
     disp_target_1 = PartialDependenceDisplay.from_estimator(

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
@@ -1,6 +1,7 @@
 """
 Testing for the partial dependence module.
 """
+import warnings
 
 import numpy as np
 import pytest
@@ -108,7 +109,7 @@ def test_output_shape(Estimator, method, data, grid_resolution, features, kind):
         kind=kind,
         grid_resolution=grid_resolution,
     )
-    pdp, axes = result, result["values"]
+    pdp, axes = result, result["pdp_values"]
 
     expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))])
     expected_ice_shape = (
@@ -434,7 +435,7 @@ def test_partial_dependence_easy_target(est, power):
         est, features=[target_variable], X=X, grid_resolution=1000, kind="average"
     )
 
-    new_X = pdp["values"][0].reshape(-1, 1)
+    new_X = pdp["pdp_values"][0].reshape(-1, 1)
     new_y = pdp["average"][0]
     # add polynomial features if needed
     new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
@@ -654,7 +655,7 @@ def test_partial_dependence_sample_weight():
 
     pdp = partial_dependence(clf, X, features=[1], kind="average")
 
-    assert np.corrcoef(pdp["average"], pdp["values"])[0, 1] > 0.99
+    assert np.corrcoef(pdp["average"], pdp["pdp_values"])[0, 1] > 0.99
 
 
 def test_hist_gbdt_sw_not_supported():
@@ -692,8 +693,8 @@ def test_partial_dependence_pipeline():
     )
     assert_allclose(pdp_pipe["average"], pdp_clf["average"])
     assert_allclose(
-        pdp_pipe["values"][0],
-        pdp_clf["values"][0] * scaler.scale_[features] + scaler.mean_[features],
+        pdp_pipe["pdp_values"][0],
+        pdp_clf["pdp_values"][0] * scaler.scale_[features] + scaler.mean_[features],
     )
 
 
@@ -761,11 +762,11 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
     if preprocessor is not None:
         scaler = preprocessor.named_transformers_["standardscaler"]
         assert_allclose(
-            pdp_pipe["values"][1],
-            pdp_clf["values"][1] * scaler.scale_[1] + scaler.mean_[1],
+            pdp_pipe["pdp_values"][1],
+            pdp_clf["pdp_values"][1] * scaler.scale_[1] + scaler.mean_[1],
         )
     else:
-        assert_allclose(pdp_pipe["values"][1], pdp_clf["values"][1])
+        assert_allclose(pdp_pipe["pdp_values"][1], pdp_clf["pdp_values"][1])
 
 
 @pytest.mark.parametrize(
@@ -796,7 +797,7 @@ def test_partial_dependence_feature_type(features, expected_pd_shape):
         pipe, df, features=features, grid_resolution=10, kind="average"
     )
     assert pdp_pipe["average"].shape == expected_pd_shape
-    assert len(pdp_pipe["values"]) == len(pdp_pipe["average"].shape) - 1
+    assert len(pdp_pipe["pdp_values"]) == len(pdp_pipe["average"].shape) - 1
 
 
 @pytest.mark.parametrize(
@@ -836,3 +837,31 @@ def test_kind_average_and_average_of_individual(Estimator, data):
     pdp_ind = partial_dependence(est, X=X, features=[1, 2], kind="individual")
     avg_ind = np.mean(pdp_ind["individual"], axis=1)
     assert_allclose(avg_ind, pdp_avg["average"])
+
+
+# TODO(1.5): Remove when bunch values is deprecated in 1.5
+def test_partial_dependence_bunch_values_deprecated():
+    """Test that deprecation warning is raised when values is accessed."""
+
+    est = LogisticRegression()
+    (X, y), _ = binary_classification_data
+    est.fit(X, y)
+
+    pdp_avg = partial_dependence(est, X=X, features=[1, 2], kind="average")
+
+    msg = (
+        "Key: 'values', is deprecated in 1.3 and will be "
+        "removed in 1.5. Please use 'pdp_values' instead"
+    )
+
+    with warnings.catch_warnings():
+        # Does not raise warnings with "pdp_values"
+        warnings.simplefilter("error", FutureWarning)
+        pdp_values = pdp_avg["pdp_values"]
+
+    with pytest.warns(FutureWarning, match=msg):
+        # Warns for "values"
+        values = pdp_avg["values"]
+
+    # "values" and "pdp_values" are the same object
+    assert values is pdp_values
diff --git a/sklearn/utils/_bunch.py b/sklearn/utils/_bunch.py
@@ -1,3 +1,6 @@
+import warnings
+
+
 class Bunch(dict):
     """Container object exposing keys as attributes.
 
@@ -24,6 +27,22 @@ class Bunch(dict):
     def __init__(self, **kwargs):
         super().__init__(kwargs)
 
+        # Map from deprecated key to warning message
+        self.__dict__["_deprecated_key_to_warnings"] = {}
+
+    def __getitem__(self, key):
+        if key in self.__dict__.get("_deprecated_key_to_warnings", {}):
+            warnings.warn(
+                self._deprecated_key_to_warnings[key],
+                FutureWarning,
+            )
+        return super().__getitem__(key)
+
+    def _set_deprecated(self, value, *, new_key, deprecated_key, warning_message):
+        """Set key in dictionary to be deprecated with its warning message."""
+        self.__dict__["_deprecated_key_to_warnings"][deprecated_key] = warning_message
+        self[new_key] = self[deprecated_key] = value
+
     def __setattr__(self, key, value):
         self[key] = value
 

diff --git a/sklearn/utils/tests/test_bunch.py b/sklearn/utils/tests/test_bunch.py
@@ -0,0 +1,32 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.utils import Bunch
+
+
+def test_bunch_attribute_deprecation():
+    """Check that bunch raises deprecation message with `__getattr__`."""
+    bunch = Bunch()
+    values = np.asarray([1, 2, 3])
+    msg = (
+        "Key: 'values', is deprecated in 1.3 and will be "
+        "removed in 1.5. Please use 'pdp_values' instead"
+    )
+    bunch._set_deprecated(
+        values, new_key="pdp_values", deprecated_key="values", warning_message=msg
+    )
+
+    with warnings.catch_warnings():
+        # Does not warn for "pdp_values"
+        warnings.simplefilter("error")
+        v = bunch["pdp_values"]
+
+    assert v is values
+
+    with pytest.warns(FutureWarning, match=msg):
+        # Warns for "values"
+        v = bunch["values"]
+
+    assert v is values