scikit-learn · thomasjpfan · Jul 13, 2023 · May 1, 2023 · May 3, 2023 · Jun 16, 2023
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
@@ -88,6 +88,8 @@ the tensors directly::
 Estimators with support for `Array API`-compatible inputs
 =========================================================
 
+- :class:`decomposition.PCA` (with `svd_solver="full"`,
+  `svd_solver="randomized"` and `power_iteration_normalizer="QR"`)
 - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`)
 
 Coverage for more estimators is expected to grow over time. Please follow the
@@ -107,4 +109,4 @@ To run these checks you need to install
 test environment. To run the full set of checks you need to install both
 `PyTorch <https://pytorch.org/>`_ and `CuPy <https://cupy.dev/>`_ and have
 a GPU. Checks that can not be executed or have missing dependencies will be
-automatically skipped.
+automatically skipped.
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -451,7 +451,7 @@ where :math:`1(x)` is the `indicator function
   >>> accuracy_score(y_true, y_pred)
   0.5
   >>> accuracy_score(y_true, y_pred, normalize=False)
-  2
+  2.0
 
 In the multilabel case with binary label indicators::
 
@@ -1696,7 +1696,7 @@ loss can also be computed as :math:`zero-one loss = 1 - accuracy`.
   >>> zero_one_loss(y_true, y_pred)
   0.25
   >>> zero_one_loss(y_true, y_pred, normalize=False)
-  1
+  1.0
 
 In the multilabel case with binary label indicators, where the first label
 set [0,1] has an error::
@@ -1705,7 +1705,7 @@ set [0,1] has an error::
   0.5
 
   >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)),  normalize=False)
-  1
+  1.0
 
 .. topic:: Example:
 

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -43,6 +43,7 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+
 :mod:`sklearn.base`
 ...................
 
@@ -61,6 +62,12 @@ Changelog
   from `None` to `auto` in version 1.6.
   :pr:`26634` by :user:`Alexandre Landeau <AlexL>` and :user:`Alexandre Vigny <avigny>`.
 
+- |Enhancement| :class:`decomposition.PCA` now supports the Array API for the
+  `full` and `randomized` solvers (with QR power iterations). See
+  :ref:`array_api` for more details.
+  :pr:`26315` by :user:`Mateusz Sokół <mtsokol>` and
+  :user:`Olivier Grisel <ogrisel>`.
+
 :mod:`sklearn.ensemble`
 .......................
 

diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py
@@ -14,6 +14,7 @@
 from scipy import linalg
 
 from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
+from ..utils._array_api import _add_to_diagonal, get_namespace
 from ..utils.validation import check_is_fitted
 
 
@@ -38,13 +39,18 @@ def get_covariance(self):
         cov : array of shape=(n_features, n_features)
             Estimated covariance of data.
         """
+        xp, _ = get_namespace(self.components_)
+    
         components_ = self.components_
         exp_var = self.explained_variance_
         if self.whiten:
-            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
-        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
-        cov = np.dot(components_.T * exp_var_diff, components_)
-        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace
+            components_ = components_ * xp.sqrt(exp_var[:, np.newaxis])
+        exp_var_diff = exp_var - self.noise_variance_
+        exp_var_diff = xp.where(
+            exp_var > self.noise_variance_, exp_var_diff, xp.asarray(0.0)
+        )
+        cov = (components_.T * exp_var_diff) @ components_
+        _add_to_diagonal(cov, self.noise_variance_, xp)
         return cov
 
     def get_precision(self):
@@ -58,26 +64,36 @@ def get_precision(self):
         precision : array, shape=(n_features, n_features)
             Estimated precision of data.
         """
+        xp, is_array_api_compliant = get_namespace(self.components_)
+
         n_features = self.components_.shape[1]
 
         # handle corner cases first
         if self.n_components_ == 0:
-            return np.eye(n_features) / self.noise_variance_
+            return xp.eye(n_features) / self.noise_variance_
+
+        if is_array_api_compliant:
+            linalg_inv = xp.linalg.inv
+        else:
+            linalg_inv = linalg.inv
 
-        if np.isclose(self.noise_variance_, 0.0, atol=0.0):
-            return linalg.inv(self.get_covariance())
+        if self.noise_variance_ == 0.0:
+            return linalg_inv(self.get_covariance())
 
         # Get precision using matrix inversion lemma
         components_ = self.components_
         exp_var = self.explained_variance_
         if self.whiten:
-            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
-        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
-        precision = np.dot(components_, components_.T) / self.noise_variance_
-        precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff
-        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))
+            components_ = components_ * xp.sqrt(exp_var[:, np.newaxis])
+        exp_var_diff = exp_var - self.noise_variance_
+        exp_var_diff = xp.where(
+            exp_var > self.noise_variance_, exp_var_diff, xp.asarray(0.0)
+        )
+        precision = components_ @ components_.T / self.noise_variance_
+        _add_to_diagonal(precision, 1.0 / exp_var_diff, xp)
+        precision = components_.T @ linalg_inv(precision) @ components_
         precision /= -(self.noise_variance_**2)
-        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_
+        _add_to_diagonal(precision, 1.0 / self.noise_variance_, xp)
         return precision
 
     @abstractmethod
@@ -116,14 +132,16 @@ def transform(self, X):
             Projection of X in the first principal components, where `n_samples`
             is the number of samples and `n_components` is the number of the components.
         """
+        xp, _ = get_namespace(X)
+
         check_is_fitted(self)
 
-        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)
+        X = self._validate_data(X, dtype=[xp.float64, xp.float32], reset=False)
         if self.mean_ is not None:
             X = X - self.mean_
-        X_transformed = np.dot(X, self.components_.T)
+        X_transformed = X @ self.components_.T
         if self.whiten:
-            X_transformed /= np.sqrt(self.explained_variance_)
+            X_transformed /= xp.sqrt(self.explained_variance_)
         return X_transformed
 
     def inverse_transform(self, X):
@@ -148,16 +166,15 @@ def inverse_transform(self, X):
         If whitening is enabled, inverse_transform will compute the
         exact inverse operation, which includes reversing whitening.
         """
+        xp, _ = get_namespace(X)
+
         if self.whiten:
-            return (
-                np.dot(
-                    X,
-                    np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_,
-                )
-                + self.mean_
+            scaled_components = (
+                xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
             )
+            return X @ scaled_components + self.mean_
         else:
-            return np.dot(X, self.components_) + self.mean_
+            return X @ self.components_ + self.mean_
 
     @property
     def _n_features_out(self):

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -22,6 +22,7 @@
 from ..base import _fit_context
 from ..utils import check_random_state
 from ..utils._arpack import _init_arpack_v0
+from ..utils._array_api import get_namespace
 from ..utils._param_validation import Interval, RealNotInt, StrOptions
 from ..utils.deprecation import deprecated
 from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip
@@ -108,8 +109,10 @@ def _infer_dimension(spectrum, n_samples):
 
     The returned value will be in [1, n_features - 1].
     """
-    ll = np.empty_like(spectrum)
-    ll[0] = -np.inf  # we don't want to return n_components = 0
+    xp, _ = get_namespace(spectrum)
+
+    ll = xp.empty_like(spectrum)
+    ll[0] = -xp.inf  # we don't want to return n_components = 0
     for rank in range(1, spectrum.shape[0]):
         ll[rank] = _assess_dimension(spectrum, rank, n_samples)
     return ll.argmax()
@@ -471,6 +474,7 @@ def fit_transform(self, X, y=None):
 
     def _fit(self, X):
         """Dispatch to the right submethod depending on the chosen solver."""
+        xp, is_array_api_compliant = get_namespace(X)
 
         # Raise an error for sparse input.
         # This is more informative than the generic one raised by check_array.
@@ -479,9 +483,14 @@ def _fit(self, X):
                 "PCA does not support sparse input. See "
                 "TruncatedSVD for a possible alternative."
             )
+        # Raise an error for non-Numpy input and arpack solver.
+        if self.svd_solver == "arpack" and is_array_api_compliant:
+            raise ValueError(
+                "PCA with svd_solver='arpack' is not supported for Array API inputs."
+            )
 
         X = self._validate_data(
-            X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy
+            X, dtype=[xp.float64, xp.float32], ensure_2d=True, copy=self.copy
         )
 
         # Handle n_components==None
@@ -513,6 +522,8 @@ def _fit(self, X):
 
     def _fit_full(self, X, n_components):
         """Fit the model by computing full SVD on X."""
+        xp, is_array_api_compliant = get_namespace(X)
+
         n_samples, n_features = X.shape
 
         if n_components == "mle":
@@ -528,20 +539,30 @@ def _fit_full(self, X, n_components):
             )
 
         # Center data
-        self.mean_ = np.mean(X, axis=0)
+        self.mean_ = xp.mean(X, axis=0)
         X -= self.mean_
 
-        U, S, Vt = linalg.svd(X, full_matrices=False)
+        if not is_array_api_compliant:
+            # Use scipy.linalg with NumPy/SciPy inputs for the sake of not
+            # introducing unanticipated behavior changes. In the long run we
+            # could instead decide to always use xp.linalg.svd for all inputs,
+            # but that would make this code rely on numpy's SVD instead of
+            # scipy's. It's not 100% clear whether they use the same LAPACK
+            # solver by default though (assuming both are built against the
+            # same BLAS).
+            U, S, Vt = linalg.svd(X, full_matrices=False)
+        else:
+            U, S, Vt = xp.linalg.svd(X, full_matrices=False)
         # flip eigenvectors' sign to enforce deterministic output
         U, Vt = svd_flip(U, Vt)
 
         components_ = Vt
 
         # Get variance explained by singular values
         explained_variance_ = (S**2) / (n_samples - 1)
-        total_var = explained_variance_.sum()
+        total_var = xp.sum(explained_variance_)
         explained_variance_ratio_ = explained_variance_ / total_var
-        singular_values_ = S.copy()  # Store the singular values.
+        singular_values_ = xp.asarray(S, copy=True)  # Store the singular values.
 
         # Postprocess the number of components required
         if n_components == "mle":
@@ -553,16 +574,16 @@ def _fit_full(self, X, n_components):
             # their variance is always greater than n_components float
             # passed. More discussion in issue: #15669
             ratio_cumsum = stable_cumsum(explained_variance_ratio_)
-            n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
+            n_components = xp.searchsorted(ratio_cumsum, n_components, side="right") + 1
         # Compute noise covariance using Probabilistic PCA model
         # The sigma2 maximum likelihood (cf. eq. 12.46)
         if n_components < min(n_features, n_samples):
-            self.noise_variance_ = 
741A
explained_variance_[n_components:].mean()
+            self.noise_variance_ = xp.mean(explained_variance_[n_components:])
         else:
             self.noise_variance_ = 0.0
 
         self.n_samples_ = n_samples
-        self.components_ = components_[:n_components]
+        self.components_ = components_[:n_components, :]
         self.n_components_ = n_components
         self.explained_variance_ = explained_variance_[:n_components]
         self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]
@@ -574,6 +595,8 @@ def _fit_truncated(self, X, n_components, svd_solver):
         """Fit the model by computing truncated SVD (by ARPACK or randomized)
         on X.
         """
+        xp, _ = get_namespace(X)
+
         n_samples, n_features = X.shape
 
         if isinstance(n_components, str):
@@ -599,7 +622,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         random_state = check_random_state(self.random_state)
 
         # Center data
-        self.mean_ = np.mean(X, axis=0)
+        self.mean_ = xp.mean(X, axis=0)
         X -= self.mean_
 
         if svd_solver == "arpack":
@@ -633,15 +656,14 @@ def _fit_truncated(self, X, n_components, svd_solver):
         # Workaround in-place variance calculation since at the time numpy
         # did not have a way to calculate variance in-place.
         N = X.shape[0] - 1
-        np.square(X, out=X)
-        np.sum(X, axis=0, out=X[0])
-        total_var = (X[0] / N).sum()
+        X **= 2
+        total_var = xp.sum(xp.sum(X, axis=0) / N)
 
         self.explained_variance_ratio_ = self.explained_variance_ / total_var
-        self.singular_values_ = S.copy()  # Store the singular values.
+        self.singular_values_ = xp.asarray(S, copy=True)  # Store the singular values.
 
         if self.n_components_ < min(n_features, n_samples):
-            self.noise_variance_ = total_var - self.explained_variance_.sum()
+            self.noise_variance_ = total_var - xp.sum(self.explained_variance_)
             self.noise_variance_ /= min(n_features, n_samples) - n_components
         else:
             self.noise_variance_ = 0.0
@@ -666,12 +688,12 @@ def score_samples(self, X):
             Log-likelihood of each sample under the current model.
         """
         check_is_fitted(self)
-
-        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)
+        xp, _ = get_namespace(X)
+        X = self._validate_data(X, dtype=[xp.float64, xp.float32], reset=False)
         Xr = X - self.mean_
         n_features = X.shape[1]
         precision = self.get_precision()
-        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
+        log_like = -0.5 * xp.sum(Xr * (Xr @ precision), axis=1)
         log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))
         return log_like
 
@@ -695,7 +717,8 @@ def score(self, X, y=None):
         ll : float
             Average log-likelihood of the samples under the current model.
         """
-        return np.mean(self.score_samples(X))
+        xp, _ = get_namespace(X)
+        return float(xp.mean(self.score_samples(X)))
 
     def _more_tags(self):
-        return {"preserves_dtype": [np.float64, np.float32]}
+        return {"preserves_dtype": [np.float64, np.float32], "array_api_support": True}