From 6b101685b5982ed3fdf5c2dcddb7e850a5d7f90d Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 28 Feb 2023 18:06:26 -0500 Subject: [PATCH 1/2] CLN Use grid_values instaed of pdp_values in partial_dependence --- doc/whats_new/v1.3.rst | 2 +- sklearn/inspection/_partial_dependence.py | 16 +++++------ .../inspection/_plot/partial_dependence.py | 6 ++-- .../tests/test_plot_partial_dependence.py | 8 +++--- .../tests/test_partial_dependence.py | 28 +++++++++---------- sklearn/utils/tests/test_bunch.py | 8 +++--- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index f89a711a447cf..bc0c415d9a2ee 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -172,7 +172,7 @@ Changelog ......................... - |API| :func:`inspection.partial_dependence` returns a :class:`utils.Bunch` with - new key: `pdp_values`. The `values` key is deprecated in favor of `pdp_values` + new key: `grid_values`. The `values` key is deprecated in favor of `grid_values` and the `values` key will be removed in 1.5. :pr:`21809` by `Thomas Fan`_. diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index 2ff8b78adc899..fd87a6bac16d1 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -369,14 +369,14 @@ def partial_dependence( .. deprecated:: 1.3 The key `values` has been deprecated in 1.3 and will be removed - in 1.5 in favor of `pdp_values`. See `pdp_values` for details + in 1.5 in favor of `grid_values`. See `grid_values` for details about the `values` attribute. - pdp_values : seq of 1d ndarrays + grid_values : seq of 1d ndarrays The values with which the grid has been created. The generated - grid is a cartesian product of the arrays in ``pdp_values``. - ``len(pdp_values) == len(features)``. The size of each array - ``pdp_values[j]`` is either ``grid_resolution``, or the number of + grid is a cartesian product of the arrays in ``grid_values`` where + ``len(grid_values) == len(features)``. The size of each array + ``grid_values[j]`` is either ``grid_resolution``, or the number of unique values in ``X[:, j]``, whichever is smaller. .. versionadded:: 1.3 @@ -384,7 +384,7 @@ def partial_dependence( ``n_outputs`` corresponds to the number of classes in a multi-class setting, or to the number of tasks for multi-output regression. For classical regression and binary classification ``n_outputs==1``. - ``n_values_feature_j`` corresponds to the size ``pdp_values[j]``. + ``n_values_feature_j`` corresponds to the size ``grid_values[j]``. See Also -------- @@ -561,10 +561,10 @@ def partial_dependence( msg = ( "Key: 'values', is deprecated in 1.3 and will be removed in 1.5. " - "Please use 'pdp_values' instead." + "Please use 'grid_values' instead." ) pdp_results._set_deprecated( - values, new_key="pdp_values", deprecated_key="values", warning_message=msg + values, new_key="grid_values", deprecated_key="values", warning_message=msg ) if kind == "average": diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index afedb40ff53ff..eb6fd78f628ee 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -1256,7 +1256,7 @@ def plot( else: pd_results_ = [] for kind_plot, pd_result in zip(kind, self.pd_results): - current_results = {"pdp_values": pd_result["pdp_values"]} + current_results = {"grid_values": pd_result["grid_values"]} if kind_plot in ("individual", "both"): preds = pd_result.individual @@ -1274,7 +1274,7 @@ def plot( # get global min and max average predictions of PD grouped by plot type pdp_lim = {} for kind_plot, pdp in zip(kind, pd_results_): - values = pdp["pdp_values"] + values = pdp["grid_values"] preds = pdp.average if kind_plot == "average" else pdp.individual min_pd = preds[self.target_idx].min() max_pd = preds[self.target_idx].max() @@ -1402,7 +1402,7 @@ def plot( ): avg_preds = None preds = None - feature_values = pd_result["pdp_values"] + feature_values = pd_result["grid_values"] if kind_plot == "individual": preds = pd_result.individual elif kind_plot == "average": diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index ff20572403e63..52389519d6c00 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -103,7 +103,7 @@ def test_plot_partial_dependence(grid_resolution, pyplot, clf_diabetes, diabetes target_idx = disp.target_idx line_data = line.get_data() - assert_allclose(line_data[0], avg_preds["pdp_values"][0]) + assert_allclose(line_data[0], avg_preds["grid_values"][0]) assert_allclose(line_data[1], avg_preds.average[target_idx].ravel()) # two feature position @@ -243,7 +243,7 @@ def test_plot_partial_dependence_str_features( assert line.get_alpha() == 0.8 line_data = line.get_data() - assert_allclose(line_data[0], avg_preds["pdp_values"][0]) + assert_allclose(line_data[0], avg_preds["grid_values"][0]) assert_allclose(line_data[1], avg_preds.average[target_idx].ravel()) # contour @@ -279,7 +279,7 @@ def test_plot_partial_dependence_custom_axes(pyplot, clf_diabetes, diabetes): target_idx = disp.target_idx line_data = line.get_data() - assert_allclose(line_data[0], avg_preds["pdp_values"][0]) + assert_allclose(line_data[0], avg_preds["grid_values"][0]) assert_allclose(line_data[1], avg_preds.average[target_idx].ravel()) # contour @@ -466,7 +466,7 @@ def test_plot_partial_dependence_multiclass(pyplot): disp_target_0.pd_results, disp_symbol.pd_results ): assert_allclose(int_result.average, symbol_result.average) - assert_allclose(int_result["pdp_values"], symbol_result["pdp_values"]) + assert_allclose(int_result["grid_values"], symbol_result["grid_values"]) # check that the pd plots are different for another target disp_target_1 = PartialDependenceDisplay.from_estimator( diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 812c059ca5f92..41c07a9385b35 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -109,7 +109,7 @@ def test_output_shape(Estimator, method, data, grid_resolution, features, kind): kind=kind, grid_resolution=grid_resolution, ) - pdp, axes = result, result["pdp_values"] + pdp, axes = result, result["grid_values"] expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))]) expected_ice_shape = ( @@ -435,7 +435,7 @@ def test_partial_dependence_easy_target(est, power): est, features=[target_variable], X=X, grid_resolution=1000, kind="average" ) - new_X = pdp["pdp_values"][0].reshape(-1, 1) + new_X = pdp["grid_values"][0].reshape(-1, 1) new_y = pdp["average"][0] # add polynomial features if needed new_X = PolynomialFeatures(degree=power).fit_transform(new_X) @@ -655,7 +655,7 @@ def test_partial_dependence_sample_weight(): pdp = partial_dependence(clf, X, features=[1], kind="average") - assert np.corrcoef(pdp["average"], pdp["pdp_values"])[0, 1] > 0.99 + assert np.corrcoef(pdp["average"], pdp["grid_values"])[0, 1] > 0.99 def test_hist_gbdt_sw_not_supported(): @@ -693,8 +693,8 @@ def test_partial_dependence_pipeline(): ) assert_allclose(pdp_pipe["average"], pdp_clf["average"]) assert_allclose( - pdp_pipe["pdp_values"][0], - pdp_clf["pdp_values"][0] * scaler.scale_[features] + scaler.mean_[features], + pdp_pipe["grid_values"][0], + pdp_clf["grid_values"][0] * scaler.scale_[features] + scaler.mean_[features], ) @@ -762,11 +762,11 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features): if preprocessor is not None: scaler = preprocessor.named_transformers_["standardscaler"] assert_allclose( - pdp_pipe["pdp_values"][1], - pdp_clf["pdp_values"][1] * scaler.scale_[1] + scaler.mean_[1], + pdp_pipe["grid_values"][1], + pdp_clf["grid_values"][1] * scaler.scale_[1] + scaler.mean_[1], ) else: - assert_allclose(pdp_pipe["pdp_values"][1], pdp_clf["pdp_values"][1]) + assert_allclose(pdp_pipe["grid_values"][1], pdp_clf["grid_values"][1]) @pytest.mark.parametrize( @@ -797,7 +797,7 @@ def test_partial_dependence_feature_type(features, expected_pd_shape): pipe, df, features=features, grid_resolution=10, kind="average" ) assert pdp_pipe["average"].shape == expected_pd_shape - assert len(pdp_pipe["pdp_values"]) == len(pdp_pipe["average"].shape) - 1 + assert len(pdp_pipe["grid_values"]) == len(pdp_pipe["average"].shape) - 1 @pytest.mark.parametrize( @@ -851,17 +851,17 @@ def test_partial_dependence_bunch_values_deprecated(): msg = ( "Key: 'values', is deprecated in 1.3 and will be " - "removed in 1.5. Please use 'pdp_values' instead" + "removed in 1.5. Please use 'grid_values' instead" ) with warnings.catch_warnings(): - # Does not raise warnings with "pdp_values" + # Does not raise warnings with "grid_values" warnings.simplefilter("error", FutureWarning) - pdp_values = pdp_avg["pdp_values"] + grid_values = pdp_avg["grid_values"] with pytest.warns(FutureWarning, match=msg): # Warns for "values" values = pdp_avg["values"] - # "values" and "pdp_values" are the same object - assert values is pdp_values + # "values" and "grid_values" are the same object + assert values is grid_values diff --git a/sklearn/utils/tests/test_bunch.py b/sklearn/utils/tests/test_bunch.py index 922e724663a34..15463475747f4 100644 --- a/sklearn/utils/tests/test_bunch.py +++ b/sklearn/utils/tests/test_bunch.py @@ -12,16 +12,16 @@ def test_bunch_attribute_deprecation(): values = np.asarray([1, 2, 3]) msg = ( "Key: 'values', is deprecated in 1.3 and will be " - "removed in 1.5. Please use 'pdp_values' instead" + "removed in 1.5. Please use 'grid_values' instead" ) bunch._set_deprecated( - values, new_key="pdp_values", deprecated_key="values", warning_message=msg + values, new_key="grid_values", deprecated_key="values", warning_message=msg ) with warnings.catch_warnings(): - # Does not warn for "pdp_values" + # Does not warn for "grid_values" warnings.simplefilter("error") - v = bunch["pdp_values"] + v = bunch["grid_values"] assert v is values From 3f23f2445153cece8d796f6dc1ce1a3c02c8efe8 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 28 Feb 2023 18:11:01 -0500 Subject: [PATCH 2/2] DOC Adds PR number --- doc/whats_new/v1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index bc0c415d9a2ee..b297155b33a94 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -174,7 +174,7 @@ Changelog - |API| :func:`inspection.partial_dependence` returns a :class:`utils.Bunch` with new key: `grid_values`. The `values` key is deprecated in favor of `grid_values` and the `values` key will be removed in 1.5. - :pr:`21809` by `Thomas Fan`_. + :pr:`21809` and :pr:`25732` by `Thomas Fan`_. :mod:`sklearn.linear_model` ...........................