8000 FIX array api support for `clip` param of `MinMaxScaler` by StefanieSenger · Pull Request #29615 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX array api support for clip param of MinMaxScaler #29615

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def transform(self, X):
X *= self.scale_
X += self.min_
if self.clip:
xp.clip(X, self.feature_range[0], self.feature_range[1], out=X)
X = xp.clip(X, self.feature_range[0], self.feature_range[1])
return X

def inverse_transform(self, X):
Expand Down
22 changes: 14 additions & 8 deletions sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
from scipy import sparse, stats

from sklearn import datasets
from sklearn import config_context, datasets
from sklearn.base import clone
from sklearn.exceptions import NotFittedError
from sklearn.metrics.pairwise import linear_kernel
Expand Down Expand Up @@ -41,6 +41,7 @@
yield_namespace_device_dtype_combinations,
)
from sklearn.utils._testing import (
_array_api_for_tests,
_convert_container,
assert_allclose,
assert_allclose_dense_sparse,
Expand Down Expand Up @@ -2475,14 +2476,19 @@ def test_standard_scaler_sparse_partial_fit_finite_variance(X_2):
assert np.isfinite(scaler.var_[0])


@pytest.mark.parametrize("feature_range", [(0, 1), (-10, 10)])
def test_minmax_scaler_clip(feature_range):
@pytest.mark.parametrize(
"array_namespace, device, _", yield_namespace_device_dtype_combinations()
)
@pytest.mark.parametrize("feature_range", [(0.0, 1.1), (-10.0, 10.0)])
def test_minmax_scaler_clip(feature_range, array_namespace, device, _):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's put array_api in the test name so that it can be picked up by the CUDA CI that only runs tests with "array_api" in their name to avoid spending GPU time running non-array API tests.

Suggested change
def test_minmax_scaler_clip(feature_range, array_namespace, device, _):
def test_minmax_scaler_clip_array_api(feature_range, array_namespace, device, _):

# test behaviour of the parameter 'clip' in MinMaxScaler
X = iris.data
scaler = MinMaxScaler(feature_range=feature_range, clip=True).fit(X)
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)
X_test = [np.r_[X_min[:2] - 10, X_max[2:] + 10]]
X_transformed = scaler.transform(X_test)
xp = _array_api_for_tests(array_namespace, device)
X = xp.asarray(iris.data)
with config_context(array_api_dispatch=True):
scaler = MinMaxScaler(feature_range=feature_range, clip=True).fit(X)
X_min, X_max = xp.min(X, axis=0), xp.max(X, axis=0)
X_test = xp.asarray([np.r_[X_min[:2] - 10, X_max[2:] + 10]])
X_transformed = scaler.transform(X_test)
assert_allclose(
X_transformed,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert_allclose in not array API compliant. We should therefore always explicitly convert the arrays to compare to a numpy counterpart.

Suggested change
X_transformed,
_convert_to_numpy(X_transformed),

_convert_to_numpy is imported from sklearn.utils._array_api.

[[feature_range[0], feature_range[0], feature_range[1], feature_range[1]]],
Expand Down
4 changes: 2 additions & 2 deletions sklearn/utils/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,8 @@ def __init__(self, device_name):
assert array1.device == device(array1, array1, array2)


# TODO: add cupy and cupy.array_api to the list of libraries once the
# the following upstream issue has been fixed:
# TODO: add cupy and cupy.array_api to the list of libraries now that the
# the following upstream issue had been fixed:
# https://github.com/cupy/cupy/issues/8180
@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"])
Expand Down
Loading
0