scikit-learn · OmarManzoor · Jun 19, 2025 · Jan 22, 2025 · Jan 22, 2025 · Feb 5, 2025
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
@@ -117,6 +117,8 @@ Estimators
 - :class:`preprocessing.MaxAbsScaler`
 - :class:`preprocessing.MinMaxScaler`
 - :class:`preprocessing.Normalizer`
+- :class:`mixture.GaussianMixture` (with `init_params="random"` or
+  `init_params="random_from_data"` and `warm_start=False`)
 
 Meta-estimators
 ---------------

diff --git a/doc/whats_new/upcoming_changes/array-api/30777.feature.rst b/doc/whats_new/upcoming_changes/array-api/30777.feature.rst
@@ -0,0 +1,4 @@
+- :class:`sklearn.gaussian_mixture.GaussianMixture` with
+  `init_params="random"` or `init_params="random_from_data"` and
+  `warm_start=False` now supports Array API compatible inputs.
+  By :user:`Stefanie Senger <StefanieSenger>` and :user:`Loïc Estève <lesteve>`
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
@@ -5,17 +5,24 @@

 import warnings
 from abc import ABCMeta, abstractmethod
+from contextlib import nullcontext
 from numbers import Integral, Real
 from time import time
 
 import numpy as np
-from scipy.special import logsumexp
 
 from .. import cluster
 from ..base import BaseEstimator, DensityMixin, _fit_context
 from ..cluster import kmeans_plusplus
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
+from ..utils._array_api import (
+    _convert_to_numpy,
+    _is_numpy_namespace,
+    _logsumexp,
+    get_namespace,
+    get_namespace_and_device,
+)
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -31,7 +38,6 @@ def _check_shape(param, param_shape, name):
 
     name : str
     """
-    param = np.array(param)
     if param.shape != param_shape:
         raise ValueError(
             "The parameter '%s' should have the shape of %s, but got %s"
@@ -86,7 +92,7 @@ def __init__(
         self.verbose_interval = verbose_interval
 
     @abstractmethod
-    def _check_parameters(self, X):
+    def _check_parameters(self, X, xp=None):
         """Check initial parameters of the derived class.
 
         Parameters
@@ -95,7 +101,7 @@ def _check_parameters(self, X):
         """
         pass
 
-    def _initialize_parameters(self, X, random_state):
+    def _initialize_parameters(self, X, random_state, xp=None):
         """Initialize the model parameters.
 
         Parameters
@@ -106,6 +112,7 @@ def _initialize_parameters(self, X, random_state):
             A random number generator instance that controls the random seed
             used for the method chosen to initialize the parameters.
         """
+        xp, _, device = get_namespace_and_device(X, xp=xp)
         n_samples, _ = X.shape
 
         if self.init_params == "kmeans":
@@ -119,16 +126,25 @@ def _initialize_parameters(self, X, random_state):
             )
             resp[np.arange(n_samples), label] = 1
         elif self.init_params == "random":
-            resp = np.asarray(
-                random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype
+            resp = xp.asarray(
+                random_state.uniform(size=(n_samples, self.n_components)),
+                dtype=X.dtype,
+                device=device,
             )
-            resp /= resp.sum(axis=1)[:, np.newaxis]
+            resp /= xp.sum(resp, axis=1)[:, xp.newaxis]
         elif self.init_params == "random_from_data":
-            resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
+            resp = xp.zeros(
                (n_samples, self.n_components), dtype=X.dtype, device=device
+            )
             indices = random_state.choice(
                 n_samples, size=self.n_components, replace=False
             )
-            resp[indices, np.arange(self.n_components)] = 1
+            # TODO: when array API supports __setitem__ with fancy indexing we
+            # can use the previous code:
+            # resp[indices, xp.arange(self.n_components)] = 1
+            # Until then we use a for loop on one dimension.
+            for col, index in enumerate(indices):
+                resp[index, col] = 1
         elif self.init_params == "k-means++":
             resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
             _, indices = kmeans_plusplus(
@@ -210,20 +226,21 @@ def fit_predict(self, X, y=None):
         labels : array, shape (n_samples,)
             Component labels.
         """
-        X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_min_samples=2)
+        xp, _ = get_namespace(X)
+        X = validate_data(self, X, dtype=[xp.float64, xp.float32], ensure_min_samples=2)
         if X.shape[0] < self.n_components:
             raise ValueError(
                 "Expected n_samples >= n_components "
                 f"but got n_components = {self.n_components}, "
                 f"n_samples = {X.shape[0]}"
             )
-        self._check_parameters(X)
+        self._check_parameters(X, xp=xp)
 
         # if we enable warm_start, we will have a unique initialisation
         do_init = not (self.warm_start and hasattr(self, "converged_"))
         n_init = self.n_init if do_init else 1
 
-        max_lower_bound = -np.inf
+        max_lower_bound = -xp.inf
         best_lower_bounds = []
         self.converged_ = False
 
@@ -234,9 +251,9 @@ def fit_predict(self, X, y=None):
             self._print_verbose_msg_init_beg(init)
 
             if do_init:
-                self._initialize_parameters(X, random_state)
+                self._initialize_parameters(X, random_state, xp=xp)
 
-            lower_bound = -np.inf if do_init else self.lower_bound_
+            lower_bound = -xp.inf if do_init else self.lower_bound_
             current_lower_bounds = []
 
             if self.max_iter == 0:
@@ -247,8 +264,8 @@ def fit_predict(self, X, y=None):
                 for n_iter in range(1, self.max_iter + 1):
                     prev_lower_bound = lower_bound
 
-                    log_prob_norm, log_resp = self._e_step(X)
-                    self._m_step(X, log_resp)
+                    log_prob_norm, log_resp = self._e_step(X, xp=xp)
+                    self._m_step(X, log_resp, xp=xp)
                     lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)
                     current_lower_bounds.append(lower_bound)
 
@@ -261,7 +278,7 @@ def fit_predict(self, X, y=None):
 
                 self._print_verbose_msg_init_end(lower_bound, converged)
 
-                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:
+                if lower_bound > max_lower_bound or max_lower_bound == -xp.inf:
                     max_lower_bound = lower_bound
                     best_params = self._get_parameters()
                     best_n_iter = n_iter
@@ -281,19 +298,19 @@ def fit_predict(self, X, y=None):
                 ConvergenceWarning,
             )
 
-        self._set_parameters(best_params)
+        self._set_parameters(best_params, xp=xp)
         self.n_iter_ = best_n_iter
         self.lower_bound_ = max_lower_bound
         self.lower_bounds_ = best_lower_bounds
 
         # Always do a final e-step to guarantee that the labels returned by
         # fit_predict(X) are always consistent with fit(X).predict(X)
         # for any value of max_iter and tol (and any random_state).
-        _, log_resp = self._e_step(X)
+        _, log_resp = self._e_step(X, xp=xp)
 
-        return log_resp.argmax(axis=1)
+        return xp.argmax(log_resp, axis=1)
 
-    def _e_step(self, X):
+    def _e_step(self, X, xp=None):
         """E step.
 
         Parameters
@@ -309,8 +326,9 @@ def _e_step(self, X):
             Logarithm of the posterior probabilities (or responsibilities) of
             the point of each sample in X.
         """
-        log_prob_norm, log_resp = self._estimate_log_prob_resp(X)
-        return np.mean(log_prob_norm), log_resp
+        xp, _ = get_namespace(X, xp=xp)
+        log_prob_norm, log_resp = self._estimate_log_prob_resp(X, xp=xp)
+        return xp.mean(log_prob_norm), log_resp
 
     @abstractmethod
     def _m_step(self, X, log_resp):
@@ -351,7 +369,7 @@ def score_samples(self, X):
         check_is_fitted(self)
         X = validate_data(self, X, reset=False)
 
-        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
+        return _logsumexp(self._estimate_weighted_log_prob(X), axis=1)
 
     def score(self, X, y=None):
         """Compute the per-sample average log-likelihood of the given data X.
@@ -370,7 +388,8 @@ def score(self, X, y=None):
         log_likelihood : float
             Log-likelihood of `X` under the Gaussian mixture model.
         """
-        return self.score_samples(X).mean()
+        xp, _ = get_namespace(X)
+        return float(xp.mean(self.score_samples(X)))
 
     def predict(self, X):
         """Predict the labels for the data samples in X using trained model.
@@ -387,8 +406,9 @@ def predict(self, X):
             Component labels.
         """
         check_is_fitted(self)
+        xp, _ = get_namespace(X)
         X = validate_data(self, X, reset=False)
-        return self._estimate_weighted_log_prob(X).argmax(axis=1)
+        return xp.argmax(self._estimate_weighted_log_prob(X), axis=1)
 
     def predict_proba(self, X):
         """Evaluate the components' density for each sample.
@@ -406,8 +426,9 @@ def predict_proba(self, X):
         """
         check_is_fitted(self)
         X = validate_data(self, X, reset=False)
-        _, log_resp = self._estimate_log_prob_resp(X)
-        return np.exp(log_resp)
+        xp, _ = get_namespace(X)
+        _, log_resp = self._estimate_log_prob_resp(X, xp=xp)
+        return xp.exp(log_resp)
 
     def sample(self, n_samples=1):
         """Generate random samples from the fitted Gaussian distribution.
@@ -426,6 +447,7 @@ def sample(self, n_samples=1):
             Component labels.
         """
         check_is_fitted(self)
+        xp, _, device_ = get_namespace_and_device(self.means_)
 
         if n_samples < 1:
             raise ValueError(
@@ -435,22 +457,30 @@ def sample(self, n_samples=1):
 
         _, n_features = self.means_.shape
         rng = check_random_state(self.random_state)
-        n_samples_comp = rng.multinomial(n_samples, self.weights_)
+        n_samples_comp = rng.multinomial(
+            n_samples, _convert_to_numpy(self.weights_, xp)
+        )
 
         if self.covariance_type == "full":
             X = np.vstack(
                 [
                     rng.multivariate_normal(mean, covariance, int(sample))
                     for (mean, covariance, sample) in zip(
-                        self.means_, self.covariances_, n_samples_comp
+                        _convert_to_numpy(self.means_, xp),
+                        _convert_to_numpy(self.covariances_, xp),
+                        n_samples_comp,
                     )
                 ]
             )
         elif self.covariance_type == "tied":
             X = np.vstack(
                 [
-                    rng.multivariate_normal(mean, self.covariances_, int(sample))
-                    for (mean, sample) in zip(self.means_, n_samples_comp)
+                    rng.multivariate_normal(
+                        mean, _convert_to_numpy(self.covariances_, xp), int(sample)
+                    )
+                    for (mean, sample) in zip(
+                        _convert_to_numpy(self.means_, xp), n_samples_comp
+                    )
                 ]
             )
         else:
@@ -460,18 +490,23 @@ def sample(self, n_samples=1):
                     + rng.standard_normal(size=(sample, n_features))
                     * np.sqrt(covariance)
                     for (mean, covariance, sample) in zip(
-                        self.means_, self.covariances_, n_samples_comp
+                        _convert_to_numpy(self.means_, xp),
+                        _convert_to_numpy(self.covariances_, xp),
+                        n_samples_comp,
                     )
                 ]
             )
 
-        y = np.concatenate(
-            [np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]
+        y = xp.concat(
+            [
+                xp.full(int(n_samples_comp[i]), i, dtype=xp.int64, device=device_)
+                for i in range(len(n_samples_comp))
+            ]
         )
 
-        return (X, y)
+        return xp.asarray(X, device=device_), y
 
-    def _estimate_weighted_log_prob(self, X):
+    def _estimate_weighted_log_prob(self, X, xp=None):
         """Estimate the weighted log-probabilities, log P(X | Z) + log weights.
 
         Parameters
@@ -482,10 +517,10 @@ def _estimate_weighted_log_prob(self, X):
         -------
         weighted_log_prob : array, shape (n_samples, n_component)
         """
-        return self._estimate_log_prob(X) + self._estimate_log_weights()
+        return self._estimate_log_prob(X, xp=xp) + self._estimate_log_weights(xp=xp)
 
     @abstractmethod
-    def _estimate_log_weights(self):
+    def _estimate_log_weights(self, xp=None):
         """Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.
 
         Returns
@@ -495,7 +530,7 @@ def _estimate_log_weights(self):
         pass
 
     @abstractmethod
-    def _estimate_log_prob(self, X):
+    def _estimate_log_prob(self, X, xp=None):
         """Estimate the log-probabilities log P(X | Z).
 
         Compute the log-probabilities per each component for each sample.
@@ -510,7 +545,7 @@ def _estimate_log_prob(self, X):
         """
         pass
 
-    def _estimate_log_prob_resp(self, X):
+    def _estimate_log_prob_resp(self, X, xp=None):
         """Estimate log probabilities and responsibilities for each sample.
 
         Compute the log probabilities, weighted log probabilities per
@@ -529,11 +564,17 @@ def _estimate_log_prob_resp(self, X):
         log_responsibilities : array, shape (n_samples, n_components)
             logarithm of the responsibilities
         """
-        weighted_log_prob = self._estimate_weighted_log_prob(X)
-        log_prob_norm = logsumexp(weighted_log_prob, axis=1)
-        with np.errstate(under="ignore"):
+        xp, _ = get_namespace(X, xp=xp)
+        weighted_log_prob = self._estimate_weighted_log_prob(X, xp=xp)
+        log_prob_norm = _logsumexp(weighted_log_prob, axis=1, xp=xp)
+
+        # There is no errstate equivalent for warning/error management in array API
+        context_manager = (
+            np.errstate(under="ignore") if _is_numpy_namespace(xp) else nullcontext()
+        )
+        with context_manager:
             # ignore underflow
-            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
+            log_resp = weighted_log_prob - log_prob_norm[:, xp.newaxis]
         return log_prob_norm, log_resp
 
     def _print_verbose_msg_init_beg(self, n_init):