diff --git a/.github/workflows/cuda-ci.yml b/.github/workflows/cuda-ci.yml index 6f46bae60fe91..2856c9ec05c52 100644 --- a/.github/workflows/cuda-ci.yml +++ b/.github/workflows/cuda-ci.yml @@ -42,4 +42,5 @@ jobs: run: | source "${HOME}/conda/etc/profile.d/conda.sh" conda activate sklearn + python -c "import sklearn; sklearn.show_versions()" SCIPY_ARRAY_API=1 pytest -k 'array_api' diff --git a/build_tools/github/create_gpu_environment.sh b/build_tools/github/create_gpu_environment.sh index 87b12848d9303..96a62d7678566 100755 --- a/build_tools/github/create_gpu_environment.sh +++ b/build_tools/github/create_gpu_environment.sh @@ -15,3 +15,6 @@ conda activate base CONDA_ENV_NAME=sklearn LOCK_FILE=build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE + +conda activate $CONDA_ENV_NAME +conda list diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 34ca3c3393bd7..d7e5247211b58 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -21,8 +21,8 @@ At this stage, this support is **considered experimental** and must be enabled explicitly as explained in the following. .. note:: - Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch` - are known to work with scikit-learn's estimators. + Currently, only `array-api-strict`, `cupy`, and `PyTorch` are known to work + with scikit-learn's estimators. Example usage ============= diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 74357c9171f10..b0e91f70ae848 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -62,6 +62,13 @@ See :ref:`array_api` for more details. compatible inputs when their base estimators do. :pr:`27096` by :user:`Tim Head ` and :user:`Olivier Grisel `. +**Other** + +- Support for the soon to be deprecated `cupy.array_api` module has been + removed in favor of directly supporting the top level `cupy` module, possibly + via the `array_api_compat.cupy` compatibility wrapper. :pr:`29639` by + :user:`Olivier Grisel `. + Metadata Routing ---------------- diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 745767f23e818..fd08850ce47b3 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -22,12 +22,12 @@ gen_even_slices, ) from ..utils._array_api import ( - _clip, _fill_or_add_to_diagonal, _find_matching_floating_dtype, _is_numpy_namespace, _max_precision_float_dtype, _modify_in_place_if_numpy, + device, get_namespace, get_namespace_and_device, ) @@ -1166,7 +1166,10 @@ def cosine_distances(X, Y=None): S = cosine_similarity(X, Y) S *= -1 S += 1 - S = _clip(S, 0, 2, xp) + # TODO: remove the xp.asarray calls once the following is fixed: + # https://github.com/data-apis/array-api-compat/issues/177 + device_ = device(S) + S = xp.clip(S, xp.asarray(0.0, device=device_), xp.asarray(2.0, device=device_)) if X is Y or Y is None: # Ensure that distances between vectors and themselves are set to 0.0. # This may not be the case due to floating point rounding errors. diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index c5032a10301fd..645f1d61cf527 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -43,7 +43,6 @@ def yield_namespaces(include_numpy_namespaces=True): # array_api_strict.Array instances always have a dummy "device" attribute. "array_api_strict", "cupy", - "cupy.array_api", "torch", ]: if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES: @@ -242,7 +241,7 @@ def _isdtype_single(dtype, kind, *, xp): elif kind == "real floating": return dtype in supported_float_dtypes(xp) elif kind == "complex floating": - # Some name spaces do not have complex, such as cupy.array_api + # Some name spaces might not have support for complex dtypes. complex_dtypes = set() if hasattr(xp, "complex64"): complex_dtypes.add(xp.complex64) @@ -304,42 +303,6 @@ def ensure_common_namespace_device(reference, *arrays): return arrays -class _ArrayAPIWrapper: - """sklearn specific Array API compatibility wrapper - - This wrapper makes it possible for scikit-learn maintainers to - deal with discrepancies between different implementations of the - Python Array API standard and its evolution over time. - - The Python Array API standard specification: - https://data-apis.org/array-api/latest/ - - Documentation of the NumPy implementation: - https://numpy.org/neps/nep-0047-array-api-standard.html - """ - - def __init__(self, array_namespace): - self._namespace = array_namespace - - def __getattr__(self, name): - return getattr(self._namespace, name) - - def __eq__(self, other): - return self._namespace == other._namespace - - def isdtype(self, dtype, kind): - return isdtype(dtype, kind, xp=self._namespace) - - def maximum(self, x1, x2): - # TODO: Remove when `maximum` is made compatible in `array_api_compat`, - # based on the `2023.12` specification. - # https://github.com/data-apis/array-api-compat/issues/127 - x1_np = _convert_to_numpy(x1, xp=self._namespace) - x2_np = _convert_to_numpy(x2, xp=self._namespace) - x_max = numpy.maximum(x1_np, x2_np) - return self._namespace.asarray(x_max, device=device(x1, x2)) - - def _check_device_cpu(device): # noqa if device not in {"cpu", None}: raise ValueError(f"Unsupported device for NumPy: {device!r}") @@ -597,11 +560,6 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None): namespace, is_array_api_compliant = array_api_compat.get_namespace(*arrays), True - # These namespaces need additional wrapping to smooth out small differences - # between implementations - if namespace.__name__ in {"cupy.array_api"}: - namespace = _ArrayAPIWrapper(namespace) - if namespace.__name__ == "array_api_strict" and hasattr( namespace, "set_array_api_strict_flags" ): @@ -827,19 +785,6 @@ def _nanmax(X, axis=None, xp=None): return X -def _clip(S, min_val, max_val, xp): - # TODO: remove this method and change all usage once we move to array api 2023.12 - # https://data-apis.org/array-api/2023.12/API_specification/generated/array_api.clip.html#clip - if _is_numpy_namespace(xp): - return numpy.clip(S, min_val, max_val) - else: - min_arr = xp.asarray(min_val, dtype=S.dtype) - max_arr = xp.asarray(max_val, dtype=S.dtype) - S = xp.where(S < min_arr, min_arr, S) - S = xp.where(S > max_arr, max_arr, S) - return S - - def _asarray_with_order( array, dtype=None, order=None, copy=None, *, xp=None, device=None ): @@ -890,8 +835,6 @@ def _convert_to_numpy(array, xp): if xp_name in {"array_api_compat.torch", "torch"}: return array.cpu().numpy() - elif xp_name == "cupy.array_api": - return array._array.get() elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover return array.get() diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index d75ca9e19cdff..1ec38edd20e83 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -1024,7 +1024,7 @@ def _array_api_for_tests(array_namespace, device): "MPS is not available because the current PyTorch install was not " "built with MPS enabled." ) - elif array_namespace in {"cupy", "cupy.array_api"}: # pragma: nocover + elif array_namespace == "cupy": # pragma: nocover import cupy if cupy.cuda.runtime.getDeviceCount() == 0: diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py index 5e3299781a531..8156662d6780d 100644 --- a/sklearn/utils/tests/test_array_api.py +++ b/sklearn/utils/tests/test_array_api.py @@ -9,7 +9,6 @@ from sklearn._config import config_context from sklearn.base import BaseEstimator from sklearn.utils._array_api import ( - _ArrayAPIWrapper, _asarray_with_order, _atol_for_type, _average, @@ -104,48 +103,6 @@ def mock_getenv(key): xp_out, is_array_api_compliant = get_namespace(X_xp) -class _AdjustableNameAPITestWrapper(_ArrayAPIWrapper): - """API wrapper that has an adjustable name. Used for testing.""" - - def __init__(self, array_namespace, name): - super().__init__(array_namespace=array_namespace) - self.__name__ = name - - -def test_array_api_wrapper_astype(): - """Test _ArrayAPIWrapper for ArrayAPIs that is not NumPy.""" - array_api_strict = pytest.importorskip("array_api_strict") - xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict") - xp = _ArrayAPIWrapper(xp_) - - X = xp.asarray(([[1, 2, 3], [3, 4, 5]]), dtype=xp.float64) - X_converted = xp.astype(X, xp.float32) - assert X_converted.dtype == xp.float32 - - X_converted = xp.asarray(X, dtype=xp.float32) - assert X_converted.dtype == xp.float32 - - -def test_array_api_wrapper_maximum(): - """Test _ArrayAPIWrapper `maximum` for ArrayAPIs other than NumPy. - - This is mainly used to test for `cupy.array_api` but since that is - not available on our coverage-enabled PR CI, we resort to using - `array-api-strict`. - """ - array_api_strict = pytest.importorskip("array_api_strict") - xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict") - xp = _ArrayAPIWrapper(xp_) - - x1 = xp.asarray(([[1, 2, 3], [3, 9, 5]]), dtype=xp.int64) - x2 = xp.asarray(([[0, 1, 6], [8, 4, 5]]), dtype=xp.int64) - result = xp.asarray([[1, 2, 6], [8, 9, 5]], dtype=xp.int64) - - x_max = xp.maximum(x1, x2) - assert x_max.dtype == x1.dtype - assert xp.all(xp.equal(x_max, result)) - - @pytest.mark.parametrize("array_api", ["numpy", "array_api_strict"]) def test_asarray_with_order(array_api): """Test _asarray_with_order passes along order for NumPy arrays.""" @@ -158,21 +115,6 @@ def test_asarray_with_order(array_api): assert X_new_np.flags["F_CONTIGUOUS"] -def test_asarray_with_order_ignored(): - """Test _asarray_with_order ignores order for Generic ArrayAPI.""" - xp = pytest.importorskip("array_api_strict") - xp_ = _AdjustableNameAPITestWrapper(xp, "array_api_strict") - - X = numpy.asarray([[1.2, 3.4, 5.1], [3.4, 5.5, 1.2]], order="C") - X = xp_.asarray(X) - - X_new = _asarray_with_order(X, order="F", xp=xp_) - - X_new_np = numpy.asarray(X_new) - assert X_new_np.flags["C_CONTIGUOUS"] - assert not X_new_np.flags["F_CONTIGUOUS"] - - @pytest.mark.parametrize( "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations() ) @@ -351,8 +293,8 @@ def __init__(self, device_name): assert array1.device == device(array1, array1, array2) -# TODO: add cupy and cupy.array_api to the list of libraries once the -# the following upstream issue has been fixed: +# TODO: add cupy to the list of libraries once the the following upstream issue +# has been fixed: # https://github.com/cupy/cupy/issues/8180 @skip_if_array_api_compat_not_configured @pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"]) @@ -419,7 +361,7 @@ def test_ravel(namespace, _device, _dtype): @skip_if_array_api_compat_not_configured -@pytest.mark.parametrize("library", ["cupy", "torch", "cupy.array_api"]) +@pytest.mark.parametrize("library", ["cupy", "torch"]) def test_convert_to_numpy_gpu(library): # pragma: nocover """Check convert_to_numpy for GPU backed libraries.""" xp = pytest.importorskip(library) @@ -459,7 +401,7 @@ def fit(self, X, y=None): [ ("torch", lambda array: array.cpu().numpy()), ("array_api_strict", lambda array: numpy.asarray(array)), - ("cupy.array_api", lambda array: array._array.get()), + ("cupy", lambda array: array.get()), ], ) def test_convert_estimator_to_ndarray(array_namespace, converter): @@ -500,15 +442,9 @@ def test_reshape_behavior(): xp.reshape(X, -1) -@pytest.mark.parametrize("wrapper", [_ArrayAPIWrapper, _NumPyAPIWrapper]) -def test_get_namespace_array_api_isdtype(wrapper): - """Test isdtype implementation from _ArrayAPIWrapper and _NumPyAPIWrapper.""" - - if wrapper == _ArrayAPIWrapper: - xp_ = pytest.importorskip("array_api_strict") - xp = _ArrayAPIWrapper(xp_) - else: - xp = _NumPyAPIWrapper() +def test_get_namespace_array_api_isdtype(): + """Test isdtype implementation from _NumPyAPIWrapper.""" + xp = _NumPyAPIWrapper() assert xp.isdtype(xp.float32, xp.float32) assert xp.isdtype(xp.float32, "real floating") @@ -533,10 +469,9 @@ def test_get_namespace_array_api_isdtype(wrapper): assert not xp.isdtype(xp.float32, "complex floating") - if wrapper == _NumPyAPIWrapper: - assert not xp.isdtype(xp.int8, "complex floating") - assert xp.isdtype(xp.complex64, "complex floating") - assert xp.isdtype(xp.complex128, "complex floating") + assert not xp.isdtype(xp.int8, "complex floating") + assert xp.isdtype(xp.complex64, "complex floating") + assert xp.isdtype(xp.complex128, "complex floating") with pytest.raises(ValueError, match="Unrecognized data type"): assert xp.isdtype(xp.int16, "unknown") diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 4599f18e7268a..e3216885d17e4 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -2011,10 +2011,9 @@ def test_pandas_array_returns_ndarray(input_values): @skip_if_array_api_compat_not_configured -@pytest.mark.parametrize("array_namespace", ["array_api_strict", "cupy.array_api"]) -def test_check_array_array_api_has_non_finite(array_namespace): +def test_check_array_array_api_has_non_finite(): """Checks that Array API arrays checks non-finite correctly.""" - xp = pytest.importorskip(array_namespace) + xp = pytest.importorskip("array_api_strict") X_nan = xp.asarray([[xp.nan, 1, 0], [0, xp.nan, 3]], dtype=xp.float32) with config_context(array_api_dispatch=True):