lesteve
diff --git a/‎sklearn/base.py
Lines changed: 16 additions & 1 deletion b/‎sklearn/base.py
Lines changed: 16 additions & 1 deletion
diff --git a/‎sklearn/cluster/_bisect_k_means.py
Lines changed: 13 additions & 23 deletions b/‎sklearn/cluster/_bisect_k_means.py
Lines changed: 13 additions & 23 deletions
diff --git a/‎sklearn/cluster/_kmeans.py
Lines changed: 57 additions & 55 deletions b/‎sklearn/cluster/_kmeans.py
Lines changed: 57 additions & 55 deletions
diff --git a/‎sklearn/cluster/tests/test_bisect_k_means.py
Lines changed: 0 additions & 25 deletions b/‎sklearn/cluster/tests/test_bisect_k_means.py
Lines changed: 0 additions & 25 deletions
@@ -25,8 +25,9 @@
 from .utils.validation import _check_feature_names_in
 from .utils.validation import _generate_get_feature_names_out
 from .utils.validation import check_is_fitted
-from .utils._estimator_html_repr import estimator_html_repr
 from .utils.validation import _get_feature_names
+from .utils._estimator_html_repr import estimator_html_repr
+from .utils._param_validation import validate_parameter_constraints
 
 
 def clone(estimator, *, safe=True):
@@ -601,6 +602,20 @@ def _validate_data(
 
         return out
 
+    def _validate_params(self):
+        """Validate types and values of constructor parameters
+
+        The expected type and values must be defined in the `_parameter_constraints`
+        class attribute, which is a dictionary `param_name: list of constraints`. See
+        the docstring of `validate_parameter_constraints` for a description of the
+        accepted constraints.
+        """
+        validate_parameter_constraints(
+            self._parameter_constraints,
+            self.get_params(deep=False),
+            caller_name=self.__class__.__name__,
+        )
+
     @property
     def _repr_html_(self):
         """HTML representation of estimator.
 
@@ -17,7 +17,7 @@
 from ..utils.validation import check_is_fitted
 from ..utils.validation import _check_sample_weight
 from ..utils.validation import check_random_state
-from ..utils.validation import _is_arraylike_not_scalar
+from ..utils._param_validation import StrOptions
 
 
 class _BisectingTree:
@@ -204,6 +204,14 @@ class BisectingKMeans(_BaseKMeans):
            [ 1., 2.]])
     """
 
+    _parameter_constraints = {
+        **_BaseKMeans._parameter_constraints,
+        "init": [StrOptions({"k-means++", "random"}), callable],
+        "copy_x": [bool],
+        "algorithm": [StrOptions({"lloyd", "elkan"})],
+        "bisecting_strategy": [StrOptions({"biggest_inertia", "largest_cluster"})],
+    }
+
   
D966
  def __init__(
         self,
         n_clusters=8,
@@ -233,27 +241,6 @@ def __init__(
         self.algorithm = algorithm
         self.bisecting_strategy = bisecting_strategy
 
-    def _check_params(self, X):
-        super()._check_params(X)
-
-        # algorithm
-        if self.algorithm not in ("lloyd", "elkan"):
-            raise ValueError(
-                "Algorithm must be either 'lloyd' or 'elkan', "
-                f"got {self.algorithm} instead."
-            )
-
-        # bisecting_strategy
-        if self.bisecting_strategy not in ["biggest_inertia", "largest_cluster"]:
-            raise ValueError(
-                "Bisect Strategy must be 'biggest_inertia' or 'largest_cluster'. "
-                f"Got {self.bisecting_strategy} instead."
-            )
-
-        # init
-        if _is_arraylike_not_scalar(self.init):
-            raise ValueError("BisectingKMeans does not support init as array.")
-
     def _warn_mkl_vcomp(self, n_active_threads):
         """Warn when vcomp and mkl are both present"""
         warnings.warn(
@@ -380,6 +367,8 @@ def fit(self, X, y=None, sample_weight=None):
         self
             Fitted estimator.
         """
+        self._validate_params()
+
         X = self._validate
F438
_data(
             X,
             accept_sparse="csr",
@@ -389,7 +378,8 @@ def fit(self, X, y=None, sample_weight=None):
             accept_large_sparse=False,
         )
 
-        self._check_params(X)
+        self._check_params_vs_input(X)
+
         self._random_state = check_random_state(self.random_state)
         sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
         self._n_threads = _openmp_effective_n_threads()
 
@@ -12,6 +12,7 @@
 # License: BSD 3 clause
 
 from abc import ABC, abstractmethod
+from numbers import Integral, Real
 import warnings
 
 import numpy as np
@@ -34,6 +35,9 @@
 from ..utils import check_random_state
 from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils.validation import _is_arraylike_not_scalar
+from ..utils._param_validation import Interval
+from ..utils._param_validation import StrOptions
+from ..utils._param_validation import validate_params
 from ..utils._openmp_helpers import _openmp_effective_n_threads
 from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper
 from ..exceptions import ConvergenceWarning
@@ -55,6 +59,15 @@
 # Initialization heuristic
 
 
+@validate_params(
+    {
+        "X": ["array-like", "sparse matrix"],
+        "n_clusters": [Interval(Integral, 1, None, closed="left")],
+        "x_squared_norms": ["array-like", None],
+        "random_state": ["random_state"],
+        "n_local_trials": [Interval(Integral, 1, None, closed="left"), None],
+    }
+)
 def kmeans_plusplus(
     X, n_clusters, *, x_squared_norms=None, random_state=None, n_local_trials=None
 ):
@@ -114,7 +127,6 @@ def kmeans_plusplus(
     >>> indices
     array([4, 2])
     """
-
     # Check data
     check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
 
@@ -135,12 +147,6 @@ def kmeans_plusplus(
             f"be equal to the length of n_samples {X.shape[0]}."
         )
 
-    if n_local_trials is not None and n_local_trials < 1:
-        raise ValueError(
-            f"n_local_trials is set to {n_local_trials} but should be an "
-            "integer value greater than zero."
-        )
-
     random_state = check_random_state(random_state)
 
     # Call private k-means++
@@ -794,6 +800,16 @@ class _BaseKMeans(
 ):
     """Base class for KMeans and MiniBatchKMeans"""
 
+    _parameter_constraints = {
+        "n_clusters": [Interval(Integral, 1, None, closed="left")],
+        "init": [StrOptions({"k-means++", "random"}), callable, "array-like"],
+        "n_init": [Interval(Integral, 1, None, closed="left")],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "verbose": [Interval(Integral, 0, None, closed="left"), bool],
+        "random_state": ["random_state"],
+    }
+
     def __init__(
         self,
         n_clusters,
@@ -813,16 +829,7 @@ def __init__(
         self.verbose = verbose
         self.random_state = random_state
 
-    def _check_params(self, X):
-        # n_init
-        if self.n_init <= 0:
-            raise ValueError(f"n_init should be > 0, got {self.n_init} instead.")
-        self._n_init = self.n_init
-
-        # max_iter
-        if self.max_iter <= 0:
-            raise ValueError(f"max_iter should be > 0, got {self.max_iter} instead.")
-
+    def _check_params_vs_input(self, X):
         # n_clusters
         if X.shape[0] < self.n_clusters:
             raise ValueError(
@@ -833,16 +840,7 @@ def _check_params(self, X):
         self._tol = _tolerance(X, self.tol)
 
         # init
-        if not (
-            _is_arraylike_not_scalar(self.init)
-            or callable(self.init)
-            or (isinstance(self.init, str) and self.init in ["k-means++", "random"])
-        ):
-            raise ValueError(
-                "init should be either 'k-means++', 'random', an array-like or a "
-                f"callable, got '{self.init}' instead."
-            )
-
+        self._n_init = self.n_init
         if _is_arraylike_not_scalar(self.init) and self._n_init != 1:
             warnings.warn(
                 "Explicit initial center position passed: performing only"
@@ -1275,6 +1273,14 @@ class KMeans(_BaseKMeans):
            [ 1.,  2.]])
     """
 
+    _parameter_constraints = {
+        **_BaseKMeans._parameter_constraints,
+        "copy_x": [bool],
+        "algorithm": [
             StrOptions({"lloyd", "elkan", "auto", "full"}, deprecated={"auto", "full"})
+        ],
+    }
+
     def __init__(
         self,
         n_clusters=8,
@@ -1301,15 +1307,8 @@ def __init__(
         self.copy_x = copy_x
         self.algorithm = algorithm
 
-    def _check_params(self, X):
-        super()._check_params(X)
-
-        # algorithm
-        if self.algorithm not in ("lloyd", "elkan", "auto", "full"):
-            raise ValueError(
-                "Algorithm must be either 'lloyd' or 'elkan', "
-                f"got {self.algorithm} instead."
-            )
+    def _check_params_vs_input(self, X):
+        super()._check_params_vs_input(X)
 
         self._algorithm = self.algorithm
         if self._algorithm in ("auto", "full"):
@@ -1362,6 +1361,8 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted estimator.
         """
+        self._validate_params()
+
         X = self._validate_data(
             X,
             accept_sparse="csr",
@@ -1371,7 +1372,8 @@ def fit(self, X, y=None, sample_weight=None):
             accept_large_sparse=False,
         )
 
-        self._check_params(X)
+        self._check_params_vs_input(X)
+
         random_state = check_random_state(self.random_state)
         sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
         self._n_threads = _openmp_effective_n_threads()
@@ -1755,6 +1757,15 @@ class MiniBatchKMeans(_BaseKMeans):
     array([0, 1], dtype=int32)
     """
 
+    _parameter_constraints = {
+        **_BaseKMeans._parameter_constraints,
+        "batch_size": [Interval(Integral, 1, None, closed="left")],
+        "compute_labels": [bool],
+        "max_no_improvement": [Interval(Integral, 0, None, closed="left"), None],
+        "init_size": [Interval(Integral, 1, None, closed="left"), None],
+        "reassignment_ratio": [Interval(Real, 0, None, closed="left")],
+    }
+
     def __init__(
         self,
         n_clusters=8,
@@ -1788,26 +1799,12 @@ def __init__(
         self.init_size = init_size
         self.reassignment_ratio = reassignment_ratio
 
-    def _check_params(self, X):
-        super()._check_params(X)
-
-        # max_no_improvement
-        if self.max_no_improvement is not None and self.max_no_improvement < 0:
-            raise ValueError(
-                "max_no_improvement should be >= 0, got "
-                f"{self.max_no_improvement} instead."
-            )
+    def _check_params_vs_input(self, X):
+        super()._check_params_vs_input(X)
 
-        # batch_size
-        if self.batch_size <= 0:
-            raise ValueError(
-                f"batch_size should be > 0, got {self.batch_size} instead."
-            )
         self._batch_size = min(self.batch_size, X.shape[0])
 
         # init_size
-        if self.init_size is not None and self.init_size <= 0:
-            raise ValueError(f"init_size should be > 0, got {self.init_size} instead.")
         self._init_size = self.init_size
         if self._init_size is None:
             self._init_size = 3 * self._batch_size
@@ -1949,6 +1946,8 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted estimator.
         """
+        self._validate_params()
+
         X = self._validate_data(
             X,
             accept_sparse="csr",
@@ -1957,7 +1956,7 @@ def fit(self, X, y=None, sample_weight=None):
             accept_large_sparse=False,
         )
 
-        self._check_params(X)
+        self._check_params_vs_input(X)
         random_state = check_random_state(self.random_state)
         sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
         self._n_threads = _openmp_effective_n_threads()
@@ -2106,6 +2105,9 @@ def partial_fit(self, X, y=None, sample_weight=None):
         """
         has_centers = hasattr(self, "cluster_centers_")
 
+        if not has_centers:
+            self._validate_params()
+
         X = self._validate_data(
             X,
             accept_sparse="csr",
@@ -2126,7 +2128,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
 
         if not has_centers:
             # this instance has not been fitted yet (fit or partial_fit)
-            self._check_params(X)
+            self._check_params_vs_input(X)
             self._n_threads = _openmp_effective_n_threads()
 
             # Validate init array
 
@@ -85,31 +85,6 @@ def test_one_cluster():
     assert_allclose(bisect_means.cluster_centers_, X.mean(axis=0).reshape(1, -1))
 
 
-@pytest.mark.parametrize(
-    "param, match",
-    [
-        # Test bisecting_strategy param
-        (
-            {"bisecting_strategy": "None"},
-            "Bisect Strategy must be 'biggest_inertia' or 'largest_cluster'",
-        ),
-        # Test init array
-        (
-            {"init": np.ones((5, 2))},
-            "BisectingKMeans does not support init as array.",
-        ),
-    ],
-)
-def test_wrong_params(param, match):
-    """Test Exceptions at check_params function."""
-    rng = np.random.RandomState(0)
-    X = rng.rand(5, 2)
-
-    with pytest.raises(ValueError, match=match):
-        bisect_means = BisectingKMeans(n_clusters=3, **param)
-        bisect_means.fit(X)
-
-
 @pytest.mark.parametrize("is_sparse", [True, False])
 def test_fit_predict(is_sparse):
     """Check if labels from fit(X) method are same as from fit(X).predict(X)."""