10000 Drop support for the redundant and deprecated `cupy.array_api` in favor of `array_api_compat`. by ogrisel · Pull Request #29639 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Drop support for the redundant and deprecated cupy.array_api in favor of array_api_compat. #29639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/cuda-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ jobs:
run: |
source "${HOME}/conda/etc/profile.d/conda.sh"
conda activate sklearn
python -c "import sklearn; sklearn.show_versions()"
SCIPY_ARRAY_API=1 pytest -k 'array_api'
3 changes: 3 additions & 0 deletions build_tools/github/create_gpu_environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ conda activate base
CONDA_ENV_NAME=sklearn
LOCK_FILE=build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock
create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE

conda activate $CONDA_ENV_NAME
conda list
4 changes: 2 additions & 2 deletions doc/modules/array_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ At this stage, this support is **considered experimental** and must be enabled
explicitly as explained in the following.

.. note::
Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch`
are known to work with scikit-learn's estimators.
Currently, only `array-api-strict`, `cupy`, and `PyTorch` are known to work
with scikit-learn's estimators.

Example usage
=============
Expand Down
7 changes: 7 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ See :ref:`array_api` for more details.
compatible inputs when their base estimators do. :pr:`27096` by :user:`Tim
Head <betatim>` and :user:`Olivier Grisel <ogrisel>`.

**Other**

- Support for the soon to be deprecated `cupy.array_api` module has been
removed in favor of directly supporting the top level `cupy` module, possibly
via the `array_api_compat.cupy` compatibility wrapper. :pr:`29639` by
:user:`Olivier Grisel <ogrisel>`.

Metadata Routing
----------------

Expand Down
7 changes: 5 additions & 2 deletions sklearn/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
gen_even_slices,
)
from ..utils._array_api import (
_clip,
_fill_or_add_to_diagonal,
_find_matching_floating_dtype,
_is_numpy_namespace,
_max_precision_float_dtype,
_modify_in_place_if_numpy,
device,
get_namespace,
get_namespace_and_device,
)
Expand Down Expand Up @@ -1166,7 +1166,10 @@ def cosine_distances(X, Y=None):
S = cosine_similarity(X, Y)
S *= -1
S += 1
S = _clip(S, 0, 2, xp)
# TODO: remove the xp.asarray calls once the following is fixed:
# https://github.com/data-apis/array-api-compat/issues/177
device_ = device(S)
S = xp.clip(S, xp.asarray(0.0, device=device_), xp.asarray(2.0, device=device_))
if X is Y or Y is None:
# Ensure that distances between vectors and themselves are set to 0.0.
# This may not be the case due to floating point rounding errors.
Expand Down
59 changes: 1 addition & 58 deletions sklearn/utils/_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def yield_namespaces(include_numpy_namespaces=True):
# array_api_strict.Array instances always have a dummy "device" attribute.
"array_api_strict",
"cupy",
"cupy.array_api",
"torch",
]:
if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES:
Expand Down Expand Up @@ -242,7 +241,7 @@ def _isdtype_single(dtype, kind, *, xp):
elif kind == "real floating":
return dtype in supported_float_dtypes(xp)
elif kind == "complex floating":
# Some name spaces do not have complex, such as cupy.array_api
# Some name spaces might not have support for complex dtypes.
complex_dtypes = set()
if hasattr(xp, "complex64"):
complex_dtypes.add(xp.complex64)
Expand Down Expand Up @@ -304,42 +303,6 @@ def ensure_common_namespace_device(reference, *arrays):
return arrays


class _ArrayAPIWrapper:
"""sklearn specific Array API compatibility wrapper

This wrapper makes it possible for scikit-learn maintainers to
deal with discrepancies between different implementations of the
Python Array API standard and its evolution over time.

The Python Array API standard specification:
https://data-apis.org/array-api/latest/

Documentation of the NumPy implementation:
https://numpy.org/neps/nep-0047-array-api-standard.html
"""

def __init__(self, array_namespace):
self._namespace = array_namespace

def __getattr__(self, name):
return getattr(self._namespace, name)

def __eq__(self, other):
return self._namespace == other._namespace

def isdtype(self, dtype, kind):
return isdtype(dtype, kind, xp=self._namespace)

def maximum(self, x1, x2):
# TODO: Remove when `maximum` is made compatible in `array_api_compat`,
# based on the `2023.12` specification.
# https://github.com/data-apis/array-api-compat/issues/127
x1_np = _convert_to_numpy(x1, xp=self._namespace)
x2_np = _convert_to_numpy(x2, xp=self._namespace)
x_max = numpy.maximum(x1_np, x2_np)
return self._namespace.asarray(x_max, device=device(x1, x2))


def _check_device_cpu(device): # noqa
if device not in {"cpu", None}:
raise ValueError(f"Unsupported device for NumPy: {device!r}")
Expand Down Expand Up @@ -597,11 +560,6 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):

namespace, is_array_api_compliant = array_api_compat.get_namespace(*arrays), True

# These namespaces need additional wrapping to smooth out small differences
# between implementations
if namespace.__name__ in {"cupy.array_api"}:
namespace = _ArrayAPIWrapper(namespace)

F438 if namespace.__name__ == "array_api_strict" and hasattr(
namespace, "set_array_api_strict_flags"
):
Expand Down Expand Up @@ -827,19 +785,6 @@ def _nanmax(X, axis=None, xp=None):
return X


def _clip(S, min_val, max_val, xp):
# TODO: remove this method and change all usage once we move to array api 2023.12
# https://data-apis.org/array-api/2023.12/API_specification/generated/array_api.clip.html#clip
if _is_numpy_namespace(xp):
return numpy.clip(S, min_val, max_val)
else:
min_arr = xp.asarray(min_val, dtype=S.dtype)
max_arr = xp.asarray(max_val, dtype=S.dtype)
S = xp.where(S < min_arr, min_arr, S)
S = xp.where(S > max_arr, max_arr, S)
return S


def _asarray_with_order(
array, dtype=None, order=None, copy=None, *, xp=None, device=None
):
Expand Down Expand Up @@ -890,8 +835,6 @@ def _convert_to_numpy(array, xp):

if xp_name in {"array_api_compat.torch", "torch"}:
return array.cpu().numpy()
elif xp_name == "cupy.array_api":
return array._array.get()
elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover
return array.get()

Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ def _array_api_for_tests(array_namespace, device):
"MPS is not available because the current PyTorch install was not "
"built with MPS enabled."
)
elif array_namespace in {"cupy", "cupy.array_api"}: # pragma: nocover
elif array_namespace == "cupy": # pragma: nocover
import cupy

if cupy.cuda.runtime.getDeviceCount() == 0:
Expand Down
85 changes: 10 additions & 75 deletions sklearn/utils/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sklearn._config import config_context
from sklearn.base import BaseEstimator
from sklearn.utils._array_api import (
_ArrayAPIWrapper,
_asarray_with_order,
_atol_for_type,
_average,
Expand Down Expand Up @@ -104,48 +103,6 @@ def mock_getenv(key):
xp_out, is_array_api_compliant = get_namespace(X_xp)


class _AdjustableNameAPITestWrapper(_ArrayAPIWrapper):
"""API wrapper that has an adjustable name. Used for testing."""

def __init__(self, array_namespace, name):
super().__init__(array_namespace=array_namespace)
self.__name__ = name


def test_array_api_wrapper_astype():
"""Test _ArrayAPIWrapper for ArrayAPIs that is not NumPy."""
array_api_strict = pytest.importorskip("array_api_strict")
xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict")
xp = _ArrayAPIWrapper(xp_)

X = xp.asarray(([[1, 2, 3], [3, 4, 5]]), dtype=xp.float64)
X_converted = xp.astype(X, xp.float32)
assert X_converted.dtype == xp.float32

X_converted = xp.asarray(X, dtype=xp.float32)
assert X_converted.dtype == xp.float32


def test_array_api_wrapper_maximum():
"""Test _ArrayAPIWrapper `maximum` for ArrayAPIs other than NumPy.

This is mainly used to test for `cupy.array_api` but since that is
not available on our coverage-enabled PR CI, we resort to using
`array-api-strict`.
"""
array_api_strict = pytest.importorskip("array_api_strict")
xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict")
xp = _ArrayAPIWrapper(xp_)

x1 = xp.asarray(([[1, 2, 3], [3, 9, 5]]), dtype=xp.int64)
x2 = xp.asarray(([[0, 1, 6], [8, 4, 5]]), dtype=xp.int64)
result = xp.asarray([[1, 2, 6], [8, 9, 5]], dtype=xp.int64)

x_max = xp.maximum(x1, x2)
assert x_max.dtype == x1.dtype
assert xp.all(xp.equal(x_max, result))


@pytest.mark.parametrize("array_api", ["numpy", "array_api_strict"])
def test_asarray_with_order(array_api):
"""Test _asarray_with_order passes along order for NumPy arrays."""
Expand All @@ -158,21 +115,6 @@ def test_asarray_with_order(array_api):
assert X_new_np.flags["F_CONTIGUOUS"]


def test_asarray_with_order_ignored():
"""Test _asarray_with_order ignores order for Generic ArrayAPI."""
xp = pytest.importorskip("array_api_strict")
xp_ = _AdjustableNameAPITestWrapper(xp, "array_api_strict")

X = numpy.asarray([[1.2, 3.4, 5.1], [3.4, 5.5, 1.2]], order="C")
X = xp_.asarray(X)

X_new = _asarray_with_order(X, order="F", xp=xp_)

X_new_np = numpy.asarray(X_new)
assert X_new_np.flags["C_CONTIGUOUS"]
assert not X_new_np.flags["F_CONTIGUOUS"]


@pytest.mark.parametrize(
"array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
)
Expand Down Expand Up @@ -351,8 +293,8 @@ def __init__(self, device_name):
assert array1.device == device(array1, array1, array2)


# TODO: add cupy and cupy.array_api to the list of libraries once the
# the following upstream issue has been fixed:
# TODO: add cupy to the list of libraries once the the following upstream issue
# has been fixed:
# https://github.com/cupy/cupy/issues/8180
Copy link
Member Author
@ogrisel ogrisel Aug 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: cupy/cupy#8180 was fixed and will be included in the next release (v14) but we still have to wait for the time being.

@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"])
Expand Down Expand Up @@ -419,7 +361,7 @@ def test_ravel(namespace, _device, _dtype):


@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("library", ["cupy", "torch", "cupy.array_api"])
@pytest.mark.parametrize("library", ["cupy", "torch"])
def test_convert_to_numpy_gpu(library): # pragma: nocover
"""Check convert_to_numpy for GPU backed libraries."""
xp = pytest.importorskip(library)
Expand Down Expand Up @@ -459,7 +401,7 @@ def fit(self, X, y=None):
[
("torch", lambda array: array.cpu().numpy()),
("array_api_strict", lambda array: numpy.asarray(array)),
("cupy.array_api", lambda array: array._array.get()),
("cupy", lambda array: array.get()),
],
)
def test_convert_estimator_to_ndarray(array_namespace, converter):
Expand Down Expand Up @@ -500,15 +442,9 @@ def test_reshape_behavior():
xp.reshape(X, -1)


@pytest.mark.parametrize("wrapper", [_ArrayAPIWrapper, _NumPyAPIWrapper])
def test_get_namespace_array_api_isdtype(wrapper):
"""Test isdtype implementation from _ArrayAPIWrapper and _NumPyAPIWrapper."""

if wrapper == _ArrayAPIWrapper:
xp_ = pytest.importorskip("array_api_strict")
xp = _ArrayAPIWrapper(xp_)
else:
xp = _NumPyAPIWrapper()
def test_get_namespace_array_api_isdtype():
"""Test isdtype implementation from _NumPyAPIWrapper."""
xp = _NumPyAPIWrapper()

assert xp.isdtype(xp.float32, xp.float32)
assert xp.isdtype(xp.float32, "real floating")
Expand All @@ -533,10 +469,9 @@ def test_get_namespace_array_api_isdtype(wrapper):

assert not xp.isdtype(xp.float32, "complex floating")

if wrapper == _NumPyAPIWrapper:
assert not xp.isdtype(xp.int8, "complex floating")
assert xp.isdtype(xp.complex64, "complex floating")
assert xp.isdtype(xp.complex128, "complex floating")
assert not xp.isdtype(xp.int8, "complex floating")
assert xp.isdtype(xp.complex64, "complex floating")
assert xp.isdtype(xp.complex128, "complex floating")

with pytest.raises(ValueError, match="Unrecognized data type"):
assert xp.isdtype(xp.int16, "unknown")
Expand Down
5 changes: 2 additions & 3 deletions sklearn/utils/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2011,10 +2011,9 @@ def test_pandas_array_returns_ndarray(input_values):


@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("array_namespace", ["array_api_strict", "cupy.array_api"])
def test_check_array_array_api_has_non_finite(array_namespace):
def test_check_array_array_api_has_non_finite():
"""Checks that Array API arrays checks non-finite correctly."""
xp = pytest.importorskip(array_namespace)
xp = pytest.importorskip("array_api_strict")

X_nan = xp.asarray([[xp.nan, 1, 0], [0, xp.nan, 3]], dtype=xp.float32)
with config_context(array_api_dispatch=True):
Expand Down
Loading
0