From aa4b63c3c67110f3c8cd125db2589305b29f1fad Mon Sep 17 00:00:00 2001
From: Chris <cboseak@gmail.com>
Date: Wed, 4 Jun 2025 15:48:10 -0500
Subject: [PATCH 1/4] [31462] DummyClassifier strategy that produces randomized
 probabilities

---
 .../sklearn.dummy/31462.feature.rst           |  5 ++
 sklearn/dummy.py                              | 35 +++++++++---
 sklearn/tests/test_dummy.py                   | 55 +++++++++++++++++++
 3 files changed, 88 insertions(+), 7 deletions(-)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst
diff --git a/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst b/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst
new file mode 100644
index 0000000000000..ce9933d803a47
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst
@@ -0,0 +1,5 @@
+- :class:`dummy.DummyClassifier` now supports a new strategy "uniform-proba" that
+  generates random probability distributions for each sample using a Dirichlet
+  distribution with all concentration parameters set to 1. This results in uniformly
+  distributed probability vectors that sum to 1 for each sample.
+  By :user:`Chris Boseakc <cboseak>`
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 7d44fa2e473bb..b4c4ec36ccc76 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -57,7 +57,7 @@ class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
     Parameters
     ----------
     strategy : {"most_frequent", "prior", "stratified", "uniform", \
-            "constant"}, default="prior"
+            "uniform-proba", "constant"}, default="prior"
         Strategy to use to generate predictions.
 
         * "most_frequent": the `predict` method always returns the most
@@ -79,6 +79,10 @@ class prior probabilities.
         * "uniform": generates predictions uniformly at random from the list
           of unique classes observed in `y`, i.e. each class has equal
           probability.
+        * "uniform-proba": generates random probability distributions for each
+          sample using a Dirichlet distribution with all concentration parameters
+          set to 1. This results in uniformly distributed probability vectors
+          that sum to 1 for each sample.
         * "constant": always predicts a constant label that is provided by
           the user. This is useful for metrics that evaluate a non-majority
           class.
@@ -89,7 +93,8 @@ class prior probabilities.
 
     random_state : int, RandomState instance or None, default=None
         Controls the randomness to generate the predictions when
-        ``strategy='stratified'`` or ``strategy='uniform'``.
+        ``strategy='stratified'``, ``strategy='uniform'``, or
+        ``strategy='uniform-proba'``.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
 
@@ -147,7 +152,16 @@ class prior probabilities.
 
     _parameter_constraints: dict = {
         "strategy": [
-            StrOptions({"most_frequent", "prior", "stratified", "uniform", "constant"})
+            StrOptions(
+                {
+                    "most_frequent",
+                    "prior",
+                    "stratified",
+                    "uniform",
+                    "uniform-proba",
+                    "constant",
+                }
+            )
         ],
         "random_state": ["random_state"],
         "constant": [Integral, str, "array-like", None],
@@ -280,7 +294,7 @@ def predict(self, X):
             class_prior_ = [class_prior_]
             constant = [constant]
         # Compute probability only once
-        if self._strategy == "stratified":
+        if self._strategy in ("stratified", "uniform-proba"):
             proba = self.predict_proba(X)
             if self.n_outputs_ == 1:
                 proba = [proba]
@@ -293,10 +307,10 @@ def predict(self, X):
             elif self._strategy == "stratified":
                 class_prob = class_prior_
 
-            elif self._strategy == "uniform":
+            elif self._strategy in ("uniform", "uniform-proba"):
                 raise ValueError(
                     "Sparse target prediction is not "
-                    "supported with the uniform strategy"
+                    f"supported with the {self._strategy} strategy"
                 )
 
             elif self._strategy == "constant":
@@ -313,7 +327,7 @@ def predict(self, X):
                     [n_samples, 1],
                 )
 
-            elif self._strategy == "stratified":
+            elif self._strategy in ("stratified", "uniform-proba"):
                 y = np.vstack(
                     [
                         classes_[k][proba[k].argmax(axis=1)]
@@ -387,6 +401,13 @@ def predict_proba(self, X):
                 out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)
                 out /= n_classes_[k]
 
+            elif self._strategy == "uniform-proba":
+                # Generate random probability vectors from Dirichlet distribution
+                # with all concentration parameters set to 1 (uniform)
+                alpha = np.ones(n_classes_[k])
+                out = rs.dirichlet(alpha, size=n_samples)
+                out = out.astype(np.float64)
+
             elif self._strategy == "constant":
                 ind = np.where(classes_[k] == constant[k])
                 out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 61f1803b7a24f..8a4a0d901c088 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -713,3 +713,58 @@ def test_dtype_of_classifier_probas(strategy):
     probas = model.fit(X, y).predict_proba(X)
 
     assert probas.dtype == np.float64
+
+
+def test_uniform_proba_strategy(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = [1, 2, 1, 1, 2]
+    clf = DummyClassifier(strategy="uniform-proba", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X_test = [[0]] * 100
+    y_pred_proba = clf.predict_proba(X_test)
+
+    # Check that probabilities sum to 1 for each sample
+    assert_array_almost_equal(np.sum(y_pred_proba, axis=1), np.ones(len(X_test)))
+
+    # Check that all probabilities are >= 0
+    assert np.all(y_pred_proba >= 0)
+
+    # Check shape
+    assert y_pred_proba.shape == (len(X_test), len(np.unique(y)))
+
+    # Check that predict returns the class with highest probability
+    y_pred = clf.predict(X_test)
+    for i in range(len(X_test)):
+        assert y_pred[i] == clf.classes_[np.argmax(y_pred_proba[i])]
+
+    _check_predict_proba(clf, X_test, y)
+
+
+def test_uniform_proba_strategy_multioutput(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = np.array([[2, 1], [2, 2], [1, 1], [1, 2], [1, 1]])
+
+    clf = DummyClassifier(strategy="uniform-proba", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X_test = [[0]] * 100
+    y_pred = clf.predict(X_test)
+    y_pred_proba = clf.predict_proba(X_test)
+
+    # For multioutput, predict_proba returns a list of arrays
+    assert isinstance(y_pred_proba, list)
+    assert len(y_pred_proba) == y.shape[1]
+
+    for k in range(y.shape[1]):
+        # Check that probabilities sum to 1 for each sample
+        assert_array_almost_equal(np.sum(y_pred_proba[k], axis=1), np.ones(len(X_test)))
+
+        # Check that all probabilities are >= 0
+        assert np.all(y_pred_proba[k] >= 0)
+
+        # Check shape
+        assert y_pred_proba[k].shape == (len(X_test), len(np.unique(y[:, k])))
+
+    _check_predict_proba(clf, X_test, y)
+    _check_behavior_2d(clf)

From 8a277496a42335a84df64fe349a00bc1b86c25be Mon Sep 17 00:00:00 2001
From: Chris <cboseak@gmail.com>
Date: Wed, 4 Jun 2025 15:48:10 -0500
Subject: [PATCH 2/4] [31462] DummyClassifier strategy that produces randomized
 probabilities

---
 .../upcoming_changes/sklearn.dummy/31488.feature.rst         | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.dummy/31488.feature.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.dummy/31488.feature.rst b/doc/whats_new/upcoming_changes/sklearn.dummy/31488.feature.rst
new file mode 100644
index 0000000000000..ce9933d803a47
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.dummy/31488.feature.rst
@@ -0,0 +1,5 @@
+- :class:`dummy.DummyClassifier` now supports a new strategy "uniform-proba" that
+  generates random probability distributions for each sample using a Dirichlet
+  distribution with all concentration parameters set to 1. This results in uniformly
+  distributed probability vectors that sum to 1 for each sample.
+  By :user:`Chris Boseakc <cboseak>`

From 7167d18ccc3cf97349d6930ac69f48bb7d99c4dd Mon Sep 17 00:00:00 2001
From: Chris <cboseak@gmail.com>
Date: Thu, 5 Jun 2025 09:10:35 -0500
Subject: [PATCH 3/4] changelog

---
 .../upcoming_changes/sklearn.dummy/31462.feature.rst         | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst b/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst
deleted file mode 100644
index ce9933d803a47..0000000000000
--- a/doc/whats_new/upcoming_changes/sklearn.dummy/31462.feature.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-- :class:`dummy.DummyClassifier` now supports a new strategy "uniform-proba" that
-  generates random probability distributions for each sample using a Dirichlet
-  distribution with all concentration parameters set to 1. This results in uniformly
-  distributed probability vectors that sum to 1 for each sample.
-  By :user:`Chris Boseakc <cboseak>`

From e39ae04e09ede8b2f03fa48a6b8e53643fbc8598 Mon Sep 17 00:00:00 2001
From: Christopher Boseak <cboseak@Gmail.com>
Date: Tue, 10 Jun 2025 16:12:38 -0500
Subject: [PATCH 4/4] Update sklearn/tests/test_dummy.py based on suggestion

Co-authored-by: Tom McClintock <thmsmcclintock@gmail.com>
---
 sklearn/tests/test_dummy.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 8a4a0d901c088..b3c2801638133 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -715,7 +715,8 @@ def test_dtype_of_classifier_probas(strategy):
     assert probas.dtype == np.float64
 
 
-def test_uniform_proba_strategy(global_random_seed):
+def test_uniform_proba_strategy(global_random_seed) -> None:
+    """Basic checks on uniform probability distributions in the dummy classifier."""
     X = [[0]] * 5  # ignored
     y = [1, 2, 1, 1, 2]
     clf = DummyClassifier(strategy="uniform-proba", random_state=global_random_seed)