scikit-learn · adrinjalali · Aug 7, 2023 · Jul 24, 2023 · Jul 24, 2023 · Jul 26, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -23,6 +23,10 @@ Changelog
   :attr:`sklearn.neighbors.KDTree.valid_metrics` as public class attributes.
   :pr:`26754` by :user:`Julien Jerphanion <jjerphan>`.
 
+- |Fix| :class:`sklearn.model_selection.HalvingRandomSearchCV` no longer raises
+  when the input to the `param_distributions` parameter is a list of dicts.
+  :pr:`26893` by :user:`Stefanie Senger <StefanieSenger>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 

diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
@@ -750,11 +750,13 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
 
-    param_distributions : dict
-        Dictionary with parameters names (string) as keys and distributions
+    param_distributions : dict or list of dicts
+        Dictionary with parameters names (`str`) as keys and distributions
         or lists of parameters to try. Distributions must provide a ``rvs``
         method for sampling (such as those from scipy.stats.distributions).
         If a list is given, it is sampled uniformly.
+        If a list of dicts is given, first a dict is sampled uniformly, and
+        then a parameter is sampled using that dict as above.
 
     n_candidates : "exhaust" or int, default="exhaust"
         The number of candidate parameters to sample, at the first
@@ -1024,7 +1026,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
 
     _parameter_constraints: dict = {
         **BaseSuccessiveHalving._parameter_constraints,
-        "param_distributions": [dict],
+        "param_distributions": [dict, list],
         "n_candidates": [
             Interval(Integral, 0, None, closed="neither"),
             StrOptions({"exhaust"}),

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -900,18 +900,16 @@ def check_cv_results_array_types(search, param_keys, score_keys):
         assert cv_results["rank_test_%s" % key].dtype == np.int32
 
 
-def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand):
+def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand, extra_keys=()):
     # Test the search.cv_results_ contains all the required results
-    assert_array_equal(
-        sorted(cv_results.keys()), sorted(param_keys + score_keys + ("params",))
-    )
+    all_keys = param_keys + score_keys + extra_keys
+    assert_array_equal(sorted(cv_results.keys()), sorted(all_keys + ("params",)))
     assert all(cv_results[key].shape == (n_cand,) for key in param_keys + score_keys)
 
 
 def test_grid_s
10000
earch_cv_results():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
-    n_splits = 3
     n_grid_points = 6
     params = [
         dict(
@@ -949,9 +947,7 @@ def test_grid_search_cv_results():
     )
     n_candidates = n_grid_points
 
-    search = GridSearchCV(
-        SVC(), cv=n_splits, param_grid=params, return_train_score=True
-    )
+    search = GridSearchCV(SVC(), cv=3, param_grid=params, return_train_score=True)
     search.fit(X, y)
     cv_results = search.cv_results_
     # Check if score and timing are reasonable
@@ -967,31 +963,35 @@ def test_grid_search_cv_results():
     check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
     # Check masking
     cv_results = search.cv_results_
-    n_candidates = len(search.cv_results_["params"])
-    assert all(
+
+    poly_results = [
         (
             cv_results["param_C"].mask[i]
             and cv_results["param_gamma"].mask[i]
             and not cv_results["param_degree"].mask[i]
         )
         for i in range(n_candidates)
-        if cv_results["param_kernel"][i] == "linear"
-    )
-    assert all(
+        if cv_results["param_kernel"][i] == "poly"
+    ]
+    assert all(poly_results)
+    assert len(poly_results) == 2
+
+    rbf_results = [
         (
             not cv_results["param_C"].mask[i]
             and not cv_results["param_gamma"].mask[i]
             and cv_results["param_degree"].mask[i]
         )
         for i in range(n_candidates)
         if cv_results["param_kernel"][i] == "rbf"
-    )
+    ]
+    assert all(rbf_results)
+    assert len(rbf_results) == 4
 
 
 def test_random_search_cv_results():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
-    n_splits = 3
     n_search_iter = 30
 
     params = [
@@ -1016,29 +1016,28 @@ def test_random_search_cv_results():
         "mean_score_time",
         "std_score_time",
     )
-    n_cand = n_search_iter
+    n_candidates = n_search_iter
 
     search = RandomizedSearchCV(
         SVC(),
         n_iter=n_search_iter,
-        cv=n_splits,
+        cv=3,
         param_distributions=params,
         return_train_score=True,
     )
     search.fit(X, y)
     cv_results = search.cv_results_
     # Check results structure
     check_cv_results_array_types(search, param_keys, score_keys)
-    check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
-    n_candidates = len(search.cv_results_["params"])
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
     assert all(
         (
             cv_results["param_C"].mask[i]
             and cv_results["param_gamma"].mask[i]
             and not cv_results["param_degree"].mask[i]
         )
         for i in range(n_candidates)
-        if cv_results["param_kernel"][i] == "linear"
+        if cv_results["param_kernel"][i] == "poly"
     )
     assert all(
         (

diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from scipy.stats import norm, randint
+from scipy.stats import expon, norm, randint
 
 from sklearn.datasets import make_classification
 from sklearn.dummy import DummyClassifier
@@ -23,7 +23,11 @@
     _SubsampleMetaSplitter,
     _top_k,
 )
-from sklearn.svm import LinearSVC
+from sklearn.model_selection.tests.test_search import (
+    check_cv_results_array_types,
+    check_cv_results_keys,
+)
+from sklearn.svm import SVC, LinearSVC
 
 
 class FastClassifier(DummyClassifier):
@@ -777,3 +781,68 @@ def test_select_best_index(SearchCV):
     # we expect the index of 'i'
     best_index = SearchCV._select_best_index(None, None, results)
     assert best_index == 8
+
+
+def test_halving_random_search_list_of_dicts():
+    """Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
+    being a list of dictionary.
+    """
+    X, y = make_classification(n_samples=150, n_features=4, random_state=42)
+
+    params = [
+        {"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
+        {"kernel": ["poly"], "degree": [2, 3]},
+    ]
+    param_keys = (
+        "param_C",
+        "param_degree",
+        "param_gamma",
+        "param_kernel",
+    )
+    score_keys = (
+        "mean_test_score",
+        "mean_train_score",
+        "rank_test_score",
+        "split0_test_score",
+        "split1_test_score",
+        "split2_test_score",
+        "split0_train_score",
+        "split1_train_score",
+        "split2_train_score",
+        "std_test_score",
+        "std_train_score",
+        "mean_fit_time",
+        "std_fit_time",
+        "mean_score_time",
+        "std_score_time",
+    )
+    extra_keys = ("n_resources", "iter")
+
+    search = HalvingRandomSearchCV(
+        SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
+    )
+    search.fit(X, y)
+    n_candidates = sum(search.n_candidates_)
+    cv_results = search.cv_results_
+    # Check results structure
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
+    check_cv_results_array_types(search, param_keys, score_keys)
+
+    assert all(
+        (
+            cv_results["param_C"].mask[i]
+            and cv_results["param_gamma"].mask[i]
+            and not cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "poly"
+    )
+    assert all(
+        (
+            not cv_results["param_C"].mask[i]
+            and not cv_results["param_gamma"].mask[i]
+            and cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "rbf"
+    )