From 68132df7569e8b4589e048c60ee8824baecf7ac9 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 23 Aug 2021 18:23:03 +0530 Subject: [PATCH 01/17] use check_scalar to validate scalar inputs in DBSCAN algorithm --- sklearn/cluster/_dbscan.py | 37 ++++++++++++++++++++++++++-- sklearn/cluster/tests/test_dbscan.py | 35 +++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 35c17bbae061a..c6fb08ea7d7f2 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -10,9 +10,11 @@ # License: BSD 3 clause import numpy as np +import numbers import warnings from scipy import sparse +from ..utils import check_scalar from ..base import BaseEstimator, ClusterMixin from ..utils.validation import _check_sample_weight from ..neighbors import NearestNeighbors @@ -339,8 +341,8 @@ def fit(self, X, y=None, sample_weight=None): """ X = self._validate_data(X, accept_sparse="csr") - if not self.eps > 0.0: - raise ValueError("eps must be positive.") + # if not self.eps > 0.0: + # raise ValueError("eps must be positive.") if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) @@ -355,6 +357,37 @@ def fit(self, X, y=None, sample_weight=None): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place + scalars_checks = { + "eps": { + "target_type": numbers.Real, + "min_val": 0.0, + "min_is_inclusive": False + }, + "min_samples": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + }, + "leaf_size": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + }, + "p": { + "target_type": numbers.Real, + "min_val": 1.0, + "min_is_inclusive": True + }, + "n_jobs": { + "target_type": numbers.Integral + } + } + + for scalar_name in scalars_checks: + check_scalar( + getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] + ) + neighbors_model = NearestNeighbors( radius=self.eps, algorithm=self.algorithm, diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 1c5ef8e58b2c5..ad47da233f3b2 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -25,6 +25,35 @@ X = generate_clustered_data(n_clusters=n_clusters) +@pytest.mark.parametrize( + "input, params, err_type, err_msg", + [ + (X, {"eps": -1.0}, ValueError, "eps == -1.0, must be a positive real number."), + (X, {"eps": 0.0}, ValueError, "eps == 0.0, must be a positive real number."), + + (X, {"min_samples": 0}, ValueError, + "min_samples == 0, must be a positive integer."), + (X, {"min_samples": 1.5}, ValueError, "min_samples == 1.5, must be an integer."), + (X, {"min_samples": -2}, ValueError, + "min_samples == -2, must be a positive integer."), + + (X, {"leaf_size": 0}, ValueError, "leaf_size == 0, must be a positive integer."), + (X, {"leaf_size": 2.5}, ValueError, "leaf_size == 1.5, must be an integer."), + (X, {"leaf_size": -3}, ValueError, + "leaf_size == -2, must be a positive integer."), + + (X, {"p": 0}, ValueError, "p == 0, must be >= 1"), + (X, {"p": -2}, ValueError, "p == -2, must be a positive real number."), + + (X, {"n_jobs": 2.5}, ValueError, "n_jobs == 2.5, must be an integer."), + ], +) +def test_dbscan_params_validation(input, params, err_type, err_msg): + """Check the parameters validation in `DBSCAN`.""" + with pytest.raises(err_type, match=err_msg): + dbscan(**params).fit(input) + + def test_dbscan_similarity(): # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. @@ -272,11 +301,11 @@ def test_input_validation(): @pytest.mark.parametrize( "args", [ - {"eps": -1.0}, + # {"eps": -1.0}, {"algorithm": "blah"}, {"metric": "blah"}, - {"leaf_size": -1}, - {"p": -1}, + # {"leaf_size": -1}, + # {"p": -1}, ], ) def test_dbscan_badargs(args): From 358adbe569a29d850dd8d140996c9ccf3754c6a0 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 23 Aug 2021 18:23:38 +0530 Subject: [PATCH 02/17] use check_scalar to validate scalar inputs in BIRCH algorithm --- sklearn/cluster/_birch.py | 29 ++++++++++++++++++++-- sklearn/cluster/tests/test_birch.py | 37 +++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 68a7a741a88aa..b2385a13b67fd 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -14,6 +14,7 @@ from ..base import TransformerMixin, ClusterMixin, BaseEstimator from ..utils.extmath import row_norms from ..utils import deprecated +from ..utils import check_scalar from ..utils.validation import check_is_fitted from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering @@ -506,6 +507,30 @@ def fit(self, X, y=None): self Fitted estimator. """ + + scalars_checks = { + "threshold": { + "target_type": numbers.Real, + "min_val": 0.0, + "min_is_inclusive": False + }, + "branching_factor": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": False + }, + "n_clusters": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + } + } + + for scalar_name in scalars_checks: + check_scalar( + getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] + ) + # TODO: Remove deprected flags in 1.2 self._deprecated_fit, self._deprecated_partial_fit = True, False return self._fit(X, partial=False) @@ -520,8 +545,8 @@ def _fit(self, X, partial): threshold = self.threshold branching_factor = self.branching_factor - if branching_factor <= 1: - raise ValueError("Branching_factor should be greater than one.") + # if branching_factor <= 1: + # raise ValueError("Branching_factor should be greater than one.") n_samples, n_features = X.shape # If partial_fit is called for the first time or fit is called, we diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index d32013e4e1314..9b0aea64e2e9d 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -19,6 +19,35 @@ from sklearn.utils._testing import assert_array_almost_equal +@pytest.mark.parametrize( + "input, params, err_type, err_msg", + [ + (X, {"threshold": -1.0}, ValueError, + "threshold == -1.0, must be a positive real number."), + (X, {"threshold": 0.0}, ValueError, + "threshold == 0.0, must be a positive real number."), + + (X, {"branching_factor": 0}, ValueError, + "branching_factor == 0, must be a positive integer greater than 1."), + (X, {"branching_factor": 1}, ValueError, + "branching_factor == 1, must be a positive integer greater than 1."), + (X, {"branching_factor": 1.5}, ValueError, + "min_samples == 1.5, must be an integer."), + (X, {"branching_factor": -2}, ValueError, + "branching_factor == -2, must be a positive integer."), + + (X, {"n_clusters": 0}, ValueError, "n_clusters == 0, must be a positive integer."), + (X, {"n_clusters": 2.5}, ValueError, "n_clusters == 2.5, must be an integer."), + (X, {"n_clusters": -3}, ValueError, + "n_clusters == -2, must be a positive integer."), + ], +) +def test_birch_params_validation(input, params, err_type, err_msg): + """Check the parameters validation in `Birch`.""" + with pytest.raises(err_type, match=err_msg): + Birch(**params).fit(input) + + def test_n_samples_leaves_roots(): # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) @@ -141,10 +170,10 @@ def test_branching_factor(): brc.fit(X) check_branching_factor(brc.root_, branching_factor) - # Raises error when branching_factor is set to one. - brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) - with pytest.raises(ValueError): - brc.fit(X) + # # Raises error when branching_factor is set to one. + # brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) + # with pytest.raises(ValueError): + # brc.fit(X) def check_threshold(birch_instance, threshold): From d0781c27de0e0bc1fc7f9a62b5d1a22a2de61355 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 23 Aug 2021 18:29:15 +0530 Subject: [PATCH 03/17] use check_scalar to validate scalar inputs in DBSCAN algorithm --- sklearn/cluster/_dbscan.py | 37 ++++++++++++++++++++++++++-- sklearn/cluster/tests/test_dbscan.py | 35 +++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 35c17bbae061a..c6fb08ea7d7f2 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -10,9 +10,11 @@ # License: BSD 3 clause import numpy as np +import numbers import warnings from scipy import sparse +from ..utils import check_scalar from ..base import BaseEstimator, ClusterMixin from ..utils.validation import _check_sample_weight from ..neighbors import NearestNeighbors @@ -339,8 +341,8 @@ def fit(self, X, y=None, sample_weight=None): """ X = self._validate_data(X, accept_sparse="csr") - if not self.eps > 0.0: - raise ValueError("eps must be positive.") + # if not self.eps > 0.0: + # raise ValueError("eps must be positive.") if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) @@ -355,6 +357,37 @@ def fit(self, X, y=None, sample_weight=None): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place + scalars_checks = { + "eps": { + "target_type": numbers.Real, + "min_val": 0.0, + "min_is_inclusive": False + }, + "min_samples": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + }, + "leaf_size": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + }, + "p": { + "target_type": numbers.Real, + "min_val": 1.0, + "min_is_inclusive": True + }, + "n_jobs": { + "target_type": numbers.Integral + } + } + + for scalar_name in scalars_checks: + check_scalar( + getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] + ) + neighbors_model = NearestNeighbors( radius=self.eps, algorithm=self.algorithm, diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 1c5ef8e58b2c5..ad47da233f3b2 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -25,6 +25,35 @@ X = generate_clustered_data(n_clusters=n_clusters) +@pytest.mark.parametrize( + "input, params, err_type, err_msg", + [ + (X, {"eps": -1.0}, ValueError, "eps == -1.0, must be a positive real number."), + (X, {"eps": 0.0}, ValueError, "eps == 0.0, must be a positive real number."), + + (X, {"min_samples": 0}, ValueError, + "min_samples == 0, must be a positive integer."), + (X, {"min_samples": 1.5}, ValueError, "min_samples == 1.5, must be an integer."), + (X, {"min_samples": -2}, ValueError, + "min_samples == -2, must be a positive integer."), + + (X, {"leaf_size": 0}, ValueError, "leaf_size == 0, must be a positive integer."), + (X, {"leaf_size": 2.5}, ValueError, "leaf_size == 1.5, must be an integer."), + (X, {"leaf_size": -3}, ValueError, + "leaf_size == -2, must be a positive integer."), + + (X, {"p": 0}, ValueError, "p == 0, must be >= 1"), + (X, {"p": -2}, ValueError, "p == -2, must be a positive real number."), + + (X, {"n_jobs": 2.5}, ValueError, "n_jobs == 2.5, must be an integer."), + ], +) +def test_dbscan_params_validation(input, params, err_type, err_msg): + """Check the parameters validation in `DBSCAN`.""" + with pytest.raises(err_type, match=err_msg): + dbscan(**params).fit(input) + + def test_dbscan_similarity(): # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. @@ -272,11 +301,11 @@ def test_input_validation(): @pytest.mark.parametrize( "args", [ - {"eps": -1.0}, + # {"eps": -1.0}, {"algorithm": "blah"}, {"metric": "blah"}, - {"leaf_size": -1}, - {"p": -1}, + # {"leaf_size": -1}, + # {"p": -1}, ], ) def test_dbscan_badargs(args): From 07b022f29a285ccf071fc29bffede9b1b3c16a80 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 23 Aug 2021 18:29:51 +0530 Subject: [PATCH 04/17] use check_scalar to validate scalar inputs in BIRCH algorithm --- sklearn/cluster/_birch.py | 29 ++++++++++++++++++++-- sklearn/cluster/tests/test_birch.py | 37 +++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 68a7a741a88aa..b2385a13b67fd 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -14,6 +14,7 @@ from ..base import TransformerMixin, ClusterMixin, BaseEstimator from ..utils.extmath import row_norms from ..utils import deprecated +from ..utils import check_scalar from ..utils.validation import check_is_fitted from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering @@ -506,6 +507,30 @@ def fit(self, X, y=None): self Fitted estimator. """ + + scalars_checks = { + "threshold": { + "target_type": numbers.Real, + "min_val": 0.0, + "min_is_inclusive": False + }, + "branching_factor": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": False + }, + "n_clusters": { + "target_type": numbers.Integral, + "min_val": 1, + "min_is_inclusive": True + } + } + + for scalar_name in scalars_checks: + check_scalar( + getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] + ) + # TODO: Remove deprected flags in 1.2 self._deprecated_fit, self._deprecated_partial_fit = True, False return self._fit(X, partial=False) @@ -520,8 +545,8 @@ def _fit(self, X, partial): threshold = self.threshold branching_factor = self.branching_factor - if branching_factor <= 1: - raise ValueError("Branching_factor should be greater than one.") + # if branching_factor <= 1: + # raise ValueError("Branching_factor should be greater than one.") n_samples, n_features = X.shape # If partial_fit is called for the first time or fit is called, we diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index d32013e4e1314..9b0aea64e2e9d 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -19,6 +19,35 @@ from sklearn.utils._testing import assert_array_almost_equal +@pytest.mark.parametrize( + "input, params, err_type, err_msg", + [ + (X, {"threshold": -1.0}, ValueError, + "threshold == -1.0, must be a positive real number."), + (X, {"threshold": 0.0}, ValueError, + "threshold == 0.0, must be a positive real number."), + + (X, {"branching_factor": 0}, ValueError, + "branching_factor == 0, must be a positive integer greater than 1."), + (X, {"branching_factor": 1}, ValueError, + "branching_factor == 1, must be a positive integer greater than 1."), + (X, {"branching_factor": 1.5}, ValueError, + "min_samples == 1.5, must be an integer."), + (X, {"branching_factor": -2}, ValueError, + "branching_factor == -2, must be a positive integer."), + + (X, {"n_clusters": 0}, ValueError, "n_clusters == 0, must be a positive integer."), + (X, {"n_clusters": 2.5}, ValueError, "n_clusters == 2.5, must be an integer."), + (X, {"n_clusters": -3}, ValueError, + "n_clusters == -2, must be a positive integer."), + ], +) +def test_birch_params_validation(input, params, err_type, err_msg): + """Check the parameters validation in `Birch`.""" + with pytest.raises(err_type, match=err_msg): + Birch(**params).fit(input) + + def test_n_samples_leaves_roots(): # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) @@ -141,10 +170,10 @@ def test_branching_factor(): brc.fit(X) check_branching_factor(brc.root_, branching_factor) - # Raises error when branching_factor is set to one. - brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) - with pytest.raises(ValueError): - brc.fit(X) + # # Raises error when branching_factor is set to one. + # brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) + # with pytest.raises(ValueError): + # brc.fit(X) def check_threshold(birch_instance, threshold): From efb2d21a6cf463603cf0d5d3c98805be8274a0e8 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Fri, 3 Sep 2021 18:53:38 +0530 Subject: [PATCH 05/17] Removed for loops and called check_scalar separately --- sklearn/cluster/_birch.py | 45 ++++++++++++-------------- sklearn/cluster/_dbscan.py | 66 +++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 57 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index b2385a13b67fd..1cc067f80e436 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -508,28 +508,27 @@ def fit(self, X, y=None): Fitted estimator. """ - scalars_checks = { - "threshold": { - "target_type": numbers.Real, - "min_val": 0.0, - "min_is_inclusive": False - }, - "branching_factor": { - "target_type": numbers.Integral, - "min_val": 1, - "min_is_inclusive": False - }, - "n_clusters": { - "target_type": numbers.Integral, - "min_val": 1, - "min_is_inclusive": True - } - } - - for scalar_name in scalars_checks: - check_scalar( - getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] - ) + check_scalar( + self.threshold, + "threshold", + target_type=numbers.Real, + min_val=0.0, + closed="neither" + ) + check_scalar( + self.branching_factor, + "branching_factor", + target_type=numbers.Integral, + min_val=1, + closed="neither" + ) + check_scalar( + self.n_clusters, + "n_clusters", + target_type=numbers.Integral, + min_val=1, + closed="left" + ) # TODO: Remove deprected flags in 1.2 self._deprecated_fit, self._deprecated_partial_fit = True, False @@ -545,8 +544,6 @@ def _fit(self, X, partial): threshold = self.threshold branching_factor = self.branching_factor - # if branching_factor <= 1: - # raise ValueError("Branching_factor should be greater than one.") n_samples, n_features = X.shape # If partial_fit is called for the first time or fit is called, we diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index c6fb08ea7d7f2..248ee4f8e57d1 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -341,9 +341,6 @@ def fit(self, X, y=None, sample_weight=None): """ X = self._validate_data(X, accept_sparse="csr") - # if not self.eps > 0.0: - # raise ValueError("eps must be positive.") - if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) @@ -357,36 +354,39 @@ def fit(self, X, y=None, sample_weight=None): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place - scalars_checks = { - "eps": { - "target_type": numbers.Real, - "min_val": 0.0, - "min_is_inclusive": False - }, - "min_samples": { - "target_type": numbers.Integral, - "min_val": 1, - "min_is_inclusive": True - }, - "leaf_size": { - "target_type": numbers.Integral, - "min_val": 1, - "min_is_inclusive": True - }, - "p": { - "target_type": numbers.Real, - "min_val": 1.0, - "min_is_inclusive": True - }, - "n_jobs": { - "target_type": numbers.Integral - } - } - - for scalar_name in scalars_checks: - check_scalar( - getattr(self, scalar_name), scalar_name, **scalars_checks[scalar_name] - ) + check_scalar( + self.eps, + "eps", + target_type=numbers.Real, + min_val=0.0, + closed="neither" + ) + check_scalar( + self.min_samples, + "min_samples", + target_type=numbers.Integral, + min_val=1, + closed="left" + ) + check_scalar( + self.leaf_size, + "leaf_size", + target_type=numbers.Integral, + min_val=1, + closed="left" + ) + check_scalar( + self.p, + "p", + target_type=numbers.Real, + min_val=1.0, + closed="left" + ) + check_scalar( + self.n_jobs, + "n_jobs", + target_type=numbers.Integral + ) neighbors_model = NearestNeighbors( radius=self.eps, From 4e36c4b2e19ba1c0ccc8ef36f24df44442dc2653 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Fri, 3 Sep 2021 18:58:04 +0530 Subject: [PATCH 06/17] Removed redundant commented lines --- sklearn/cluster/tests/test_birch.py | 5 ----- sklearn/cluster/tests/test_dbscan.py | 3 --- 2 files changed, 8 deletions(-) diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index 9b0aea64e2e9d..fdc14e85605f7 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -170,11 +170,6 @@ def test_branching_factor(): brc.fit(X) check_branching_factor(brc.root_, branching_factor) - # # Raises error when branching_factor is set to one. - # brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) - # with pytest.raises(ValueError): - # brc.fit(X) - def check_threshold(birch_instance, threshold): """Use the leaf linked list for traversal""" diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index ad47da233f3b2..592745196d7cd 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -301,11 +301,8 @@ def test_input_validation(): @pytest.mark.parametrize( "args", [ - # {"eps": -1.0}, {"algorithm": "blah"}, {"metric": "blah"}, - # {"leaf_size": -1}, - # {"p": -1}, ], ) def test_dbscan_badargs(args): From 11a42ce92d440c79726b795cbb764fe7243f1f8e Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 20 Sep 2021 15:20:40 +0530 Subject: [PATCH 07/17] Made the suggested changes in DBSCAN & BIRCH algos --- sklearn/cluster/_birch.py | 27 ++++++----- sklearn/cluster/_dbscan.py | 28 +++++------ sklearn/cluster/tests/test_birch.py | 71 ++++++++++++++++------------ sklearn/cluster/tests/test_dbscan.py | 66 ++++++++++++++------------ 4 files changed, 105 insertions(+), 87 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index bfde52e7dbc5e..b418e0b5b790b 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -13,9 +13,8 @@ from ..metrics.pairwise import euclidean_distances from ..base import TransformerMixin, ClusterMixin, BaseEstimator from ..utils.extmath import row_norms -from ..utils import deprecated -from ..utils import check_scalar -from ..utils.validation import check_is_fitted +from ..utils import check_scalar, deprecated +from ..utils.validation import _num_samples, check_is_fitted from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering from .._config import config_context @@ -519,22 +518,24 @@ def fit(self, X, y=None): "threshold", target_type=numbers.Real, min_val=0.0, - closed="neither" + include_boundaries="neither", ) check_scalar( self.branching_factor, "branching_factor", target_type=numbers.Integral, min_val=1, - closed="neither" - ) - check_scalar( - self.n_clusters, - "n_clusters", - target_type=numbers.Integral, - min_val=1, - closed="left" + include_boundaries="neither", ) + if isinstance(self.n_clusters, numbers.Number): + check_scalar( + self.n_clusters, + "n_clusters", + target_type=numbers.Integral, + min_val=1, + max_val=_num_samples(X), + include_boundaries="both", + ) # TODO: Remove deprected flags in 1.2 self._deprecated_fit, self._deprecated_partial_fit = True, False @@ -722,7 +723,7 @@ def _global_clustering(self, X=None): if len(centroids) < self.n_clusters: not_enough_centroids = True elif clusterer is not None and not hasattr(clusterer, "fit_predict"): - raise ValueError( + raise TypeError( "n_clusters should be an instance of ClusterMixin or an int" ) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index f248d069f1f2a..3422a2074257e 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -365,34 +365,32 @@ def fit(self, X, y=None, sample_weight=None): "eps", target_type=numbers.Real, min_val=0.0, - closed="neither" + include_boundaries="neither", ) check_scalar( self.min_samples, "min_samples", target_type=numbers.Integral, min_val=1, - closed="left" + include_boundaries="left", ) check_scalar( self.leaf_size, "leaf_size", target_type=numbers.Integral, min_val=1, - closed="left" - ) - check_scalar( - self.p, - "p", - target_type=numbers.Real, - min_val=1.0, - closed="left" - ) - check_scalar( - self.n_jobs, - "n_jobs", - target_type=numbers.Integral + include_boundaries="left", ) + if self.p is not None: + check_scalar( + self.p, + "p", + target_type=numbers.Real, + min_val=1.0, + include_boundaries="left", + ) + if self.n_jobs is not None: + check_scalar(self.n_jobs, "n_jobs", target_type=numbers.Integral) neighbors_model = NearestNeighbors( radius=self.eps, diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index fdc14e85605f7..c2e9196af894b 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -19,35 +19,6 @@ from sklearn.utils._testing import assert_array_almost_equal -@pytest.mark.parametrize( - "input, params, err_type, err_msg", - [ - (X, {"threshold": -1.0}, ValueError, - "threshold == -1.0, must be a positive real number."), - (X, {"threshold": 0.0}, ValueError, - "threshold == 0.0, must be a positive real number."), - - (X, {"branching_factor": 0}, ValueError, - "branching_factor == 0, must be a positive integer greater than 1."), - (X, {"branching_factor": 1}, ValueError, - "branching_factor == 1, must be a positive integer greater than 1."), - (X, {"branching_factor": 1.5}, ValueError, - "min_samples == 1.5, must be an integer."), - (X, {"branching_factor": -2}, ValueError, - "branching_factor == -2, must be a positive integer."), - - (X, {"n_clusters": 0}, ValueError, "n_clusters == 0, must be a positive integer."), - (X, {"n_clusters": 2.5}, ValueError, "n_clusters == 2.5, must be an integer."), - (X, {"n_clusters": -3}, ValueError, - "n_clusters == -2, must be a positive integer."), - ], -) -def test_birch_params_validation(input, params, err_type, err_msg): - """Check the parameters validation in `Birch`.""" - with pytest.raises(err_type, match=err_msg): - Birch(**params).fit(input) - - def test_n_samples_leaves_roots(): # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) @@ -114,7 +85,8 @@ def test_n_clusters(): # Test that the wrong global clustering step raises an Error. clf = ElasticNet() brc3 = Birch(n_clusters=clf) - with pytest.raises(ValueError): + err_msg = "n_clusters should be an instance of ClusterMixin or an int" + with pytest.raises(TypeError, match=err_msg): brc3.fit(X) # Test that a small number of clusters raises a warning. @@ -211,3 +183,42 @@ def test_birch_fit_attributes_deprecated(attribute): with pytest.warns(FutureWarning, match=msg): getattr(brc, attribute) + + +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"threshold": -1.0}, ValueError, "threshold == -1.0, must be > 0.0."), + ({"threshold": 0.0}, ValueError, "threshold == 0.0, must be > 0.0."), + ({"branching_factor": 0}, ValueError, + "branching_factor == 0, must be > 1."), + ({"branching_factor": 1}, ValueError, + "branching_factor == 1, must be > 1."), + ( + {"branching_factor": 1.5}, + TypeError, + "branching_factor must be an instance of , not" + " .", + ), + ({"branching_factor": -2}, ValueError, + "branching_factor == -2, must be > 1."), + ({"n_clusters": 0}, ValueError, "n_clusters == 0, must be >= 1."), + ( + {"n_clusters": 2.5}, + TypeError, + "n_clusters must be an instance of , not .", + ), + ( + {"n_clusters": "whatever"}, + TypeError, + "n_clusters should be an instance of ClusterMixin or an int", + ), + ({"n_clusters": -3}, ValueError, "n_clusters == -3, must be >= 1."), + ], +) +def test_birch_params_validation(params, err_type, err_msg): + """Check the parameters validation in `Birch`.""" + X, _ = make_blobs(n_samples=80, centers=4) + with pytest.raises(err_type, match=err_msg): + Birch(**params).fit(X) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 592745196d7cd..b593d83d45bb8 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -25,35 +25,6 @@ X = generate_clustered_data(n_clusters=n_clusters) -@pytest.mark.parametrize( - "input, params, err_type, err_msg", - [ - (X, {"eps": -1.0}, ValueError, "eps == -1.0, must be a positive real number."), - (X, {"eps": 0.0}, ValueError, "eps == 0.0, must be a positive real number."), - - (X, {"min_samples": 0}, ValueError, - "min_samples == 0, must be a positive integer."), - (X, {"min_samples": 1.5}, ValueError, "min_samples == 1.5, must be an integer."), - (X, {"min_samples": -2}, ValueError, - "min_samples == -2, must be a positive integer."), - - (X, {"leaf_size": 0}, ValueError, "leaf_size == 0, must be a positive integer."), - (X, {"leaf_size": 2.5}, ValueError, "leaf_size == 1.5, must be an integer."), - (X, {"leaf_size": -3}, ValueError, - "leaf_size == -2, must be a positive integer."), - - (X, {"p": 0}, ValueError, "p == 0, must be >= 1"), - (X, {"p": -2}, ValueError, "p == -2, must be a positive real number."), - - (X, {"n_jobs": 2.5}, ValueError, "n_jobs == 2.5, must be an integer."), - ], -) -def test_dbscan_params_validation(input, params, err_type, err_msg): - """Check the parameters validation in `DBSCAN`.""" - with pytest.raises(err_type, match=err_msg): - dbscan(**params).fit(input) - - def test_dbscan_similarity(): # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. @@ -454,3 +425,40 @@ def test_dbscan_precomputed_metric_with_initial_rows_zero(): matrix = sparse.csr_matrix(ar) labels = DBSCAN(eps=0.2, metric="precomputed", min_samples=2).fit(matrix).labels_ assert_array_equal(labels, [-1, -1, 0, 0, 0, 1, 1]) + + +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"eps": -1.0}, ValueError, "eps == -1.0, must be > 0.0."), + ({"eps": 0.0}, ValueError, "eps == 0.0, must be > 0.0."), + ({"min_samples": 0}, ValueError, "min_samples == 0, must be >= 1."), + ( + {"min_samples": 1.5}, + TypeError, + "min_samples must be an instance of , not .", + ), + ({"min_samples": -2}, ValueError, "min_samples == -2, must be >= 1."), + ({"leaf_size": 0}, ValueError, "leaf_size == 0, must be >= 1."), + ( + {"leaf_size": 2.5}, + TypeError, + "leaf_size must be an instance of , not .", + ), + ({"leaf_size": -3}, ValueError, "leaf_size == -3, must be >= 1."), + ({"p": 0}, ValueError, "p == 0, must be >= 1.0."), + ({"p": -2}, ValueError, "p == -2, must be >= 1.0."), + ( + {"n_jobs": 2.5}, + TypeError, + "n_jobs must be an instance of , not .", + ), + ], +) +def test_dbscan_params_validation(params, err_type, err_msg): + """Check the parameters validation in `DBSCAN`.""" + with pytest.raises(err_type, match=err_msg): + DBSCAN(**params).fit(X) From 714e5a02f1f069e481b577bdb4c60e87f588331a Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 20 Sep 2021 15:34:03 +0530 Subject: [PATCH 08/17] Implemented all the suggested changes --- sklearn/cluster/_birch.py | 33 ++++++++++++++++++++++++---- sklearn/cluster/tests/test_birch.py | 29 ------------------------ sklearn/cluster/tests/test_dbscan.py | 29 ------------------------ 3 files changed, 29 insertions(+), 62 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index ad7340a3dcd06..b418e0b5b790b 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -480,7 +480,7 @@ def __init__( # TODO: Remove in 1.2 # mypy error: Decorated property not supported @deprecated( # type: ignore - "`fit_` is deprecated in 1.0 and will be removed in 1.2." + "`fit_` is deprecated in 1.0 and will be removed in 1.2" ) @property def fit_(self): @@ -489,7 +489,7 @@ def fit_(self): # TODO: Remove in 1.2 # mypy error: Decorated property not supported @deprecated( # type: ignore - "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2." + "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2" ) @property def partial_fit_(self): @@ -512,7 +512,32 @@ def fit(self, X, y=None): self Fitted estimator. """ - # TODO: Remove deprecated flags in 1.2 + + check_scalar( + self.threshold, + "threshold", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + check_scalar( + self.branching_factor, + "branching_factor", + target_type=numbers.Integral, + min_val=1, + include_boundaries="neither", + ) + if isinstance(self.n_clusters, numbers.Number): + check_scalar( + self.n_clusters, + "n_clusters", + target_type=numbers.Integral, + min_val=1, + max_val=_num_samples(X), + include_boundaries="both", + ) + + # TODO: Remove deprected flags in 1.2 self._deprecated_fit, self._deprecated_partial_fit = True, False return self._fit(X, partial=False) @@ -614,7 +639,7 @@ def partial_fit(self, X=None, y=None): self Fitted estimator. """ - # TODO: Remove deprecated flags in 1.2 + # TODO: Remove deprected flags in 1.2 self._deprecated_partial_fit, self._deprecated_fit = True, False if X is None: # Perform just the final global clustering step. diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index 77cef6504464a..c2e9196af894b 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -19,35 +19,6 @@ from sklearn.utils._testing import assert_array_almost_equal -@pytest.mark.parametrize( - "input, params, err_type, err_msg", - [ - (X, {"threshold": -1.0}, ValueError, - "threshold == -1.0, must be a positive real number."), - (X, {"threshold": 0.0}, ValueError, - "threshold == 0.0, must be a positive real number."), - - (X, {"branching_factor": 0}, ValueError, - "branching_factor == 0, must be a positive integer greater than 1."), - (X, {"branching_factor": 1}, ValueError, - "branching_factor == 1, must be a positive integer greater than 1."), - (X, {"branching_factor": 1.5}, ValueError, - "min_samples == 1.5, must be an integer."), - (X, {"branching_factor": -2}, ValueError, - "branching_factor == -2, must be a positive integer."), - - (X, {"n_clusters": 0}, ValueError, "n_clusters == 0, must be a positive integer."), - (X, {"n_clusters": 2.5}, ValueError, "n_clusters == 2.5, must be an integer."), - (X, {"n_clusters": -3}, ValueError, - "n_clusters == -2, must be a positive integer."), - ], -) -def test_birch_params_validation(input, params, err_type, err_msg): - """Check the parameters validation in `Birch`.""" - with pytest.raises(err_type, match=err_msg): - Birch(**params).fit(input) - - def test_n_samples_leaves_roots(): # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 543ef59bc27c3..b593d83d45bb8 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -25,35 +25,6 @@ X = generate_clustered_data(n_clusters=n_clusters) -@pytest.mark.parametrize( - "input, params, err_type, err_msg", - [ - (X, {"eps": -1.0}, ValueError, "eps == -1.0, must be a positive real number."), - (X, {"eps": 0.0}, ValueError, "eps == 0.0, must be a positive real number."), - - (X, {"min_samples": 0}, ValueError, - "min_samples == 0, must be a positive integer."), - (X, {"min_samples": 1.5}, ValueError, "min_samples == 1.5, must be an integer."), - (X, {"min_samples": -2}, ValueError, - "min_samples == -2, must be a positive integer."), - - (X, {"leaf_size": 0}, ValueError, "leaf_size == 0, must be a positive integer."), - (X, {"leaf_size": 2.5}, ValueError, "leaf_size == 1.5, must be an integer."), - (X, {"leaf_size": -3}, ValueError, - "leaf_size == -2, must be a positive integer."), - - (X, {"p": 0}, ValueError, "p == 0, must be >= 1"), - (X, {"p": -2}, ValueError, "p == -2, must be a positive real number."), - - (X, {"n_jobs": 2.5}, ValueError, "n_jobs == 2.5, must be an integer."), - ], -) -def test_dbscan_params_validation(input, params, err_type, err_msg): - """Check the parameters validation in `DBSCAN`.""" - with pytest.raises(err_type, match=err_msg): - dbscan(**params).fit(input) - - def test_dbscan_similarity(): # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. From 87c582f4af573f802de92904837e01cefa29204b Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Mon, 20 Sep 2021 15:41:46 +0530 Subject: [PATCH 09/17] Corrected the typo --- sklearn/cluster/_birch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index b418e0b5b790b..689801bb7c055 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -639,7 +639,7 @@ def partial_fit(self, X=None, y=None): self Fitted estimator. """ - # TODO: Remove deprected flags in 1.2 + # TODO: Remove deprecated flags in 1.2 self._deprecated_partial_fit, self._deprecated_fit = True, False if X is None: # Perform just the final global clustering step. From 4136d7bef4c080a95c5c2224a881f526cf3bdbce Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Thu, 23 Sep 2021 18:21:19 +0530 Subject: [PATCH 10/17] Reformatted the file to resolve failing tests --- sklearn/cluster/tests/test_birch.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index c2e9196af894b..5d8a3222ef156 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -190,18 +190,15 @@ def test_birch_fit_attributes_deprecated(attribute): [ ({"threshold": -1.0}, ValueError, "threshold == -1.0, must be > 0.0."), ({"threshold": 0.0}, ValueError, "threshold == 0.0, must be > 0.0."), - ({"branching_factor": 0}, ValueError, - "branching_factor == 0, must be > 1."), - ({"branching_factor": 1}, ValueError, - "branching_factor == 1, must be > 1."), + ({"branching_factor": 0}, ValueError, "branching_factor == 0, must be > 1."), + ({"branching_factor": 1}, ValueError, "branching_factor == 1, must be > 1."), ( {"branching_factor": 1.5}, TypeError, "branching_factor must be an instance of , not" " .", ), - ({"branching_factor": -2}, ValueError, - "branching_factor == -2, must be > 1."), + ({"branching_factor": -2}, ValueError, "branching_factor == -2, must be > 1."), ({"n_clusters": 0}, ValueError, "n_clusters == 0, must be >= 1."), ( {"n_clusters": 2.5}, From 93cb702bb15c706f34910720f1acf29d12baab4f Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 23 Sep 2021 15:41:13 +0200 Subject: [PATCH 11/17] Apply suggestions from code review --- sklearn/cluster/_birch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 689801bb7c055..a504847edac78 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -480,7 +480,7 @@ def __init__( # TODO: Remove in 1.2 # mypy error: Decorated property not supported @deprecated( # type: ignore - "`fit_` is deprecated in 1.0 and will be removed in 1.2" + "`fit_` is deprecated in 1.0 and will be removed in 1.2." ) @property def fit_(self): @@ -489,7 +489,7 @@ def fit_(self): # TODO: Remove in 1.2 # mypy error: Decorated property not supported @deprecated( # type: ignore - "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2" + "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2." ) @property def partial_fit_(self): From 3cfbd8cdfe69db49d8408e552be5c947f15f6cd4 Mon Sep 17 00:00:00 2001 From: Sanjay Marreddi <57671048+SanjayMarreddi@users.noreply.github.com> Date: Tue, 5 Oct 2021 00:27:04 +0530 Subject: [PATCH 12/17] Removed max_val check as suggested. Co-authored-by: Thomas J. Fan --- sklearn/cluster/_birch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index a504847edac78..17af9e37d5e93 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -533,7 +533,6 @@ def fit(self, X, y=None): "n_clusters", target_type=numbers.Integral, min_val=1, - max_val=_num_samples(X), include_boundaries="both", ) From c83ee0e4a373f07000d9687cd30c46311cdce321 Mon Sep 17 00:00:00 2001 From: Sanjay Marreddi <57671048+SanjayMarreddi@users.noreply.github.com> Date: Tue, 5 Oct 2021 00:28:10 +0530 Subject: [PATCH 13/17] Corrected the value of min_val Co-authored-by: Thomas J. Fan --- sklearn/cluster/_dbscan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 3422a2074257e..72c9a3827807e 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -386,7 +386,7 @@ def fit(self, X, y=None, sample_weight=None): self.p, "p", target_type=numbers.Real, - min_val=1.0, + min_val=0.0, include_boundaries="left", ) if self.n_jobs is not None: From 65755879fda66125b8a73ad4574cf7530dd76e53 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Tue, 5 Oct 2021 12:17:40 +0530 Subject: [PATCH 14/17] Added comments and used Linting --- sklearn/cluster/_birch.py | 1 + sklearn/cluster/_dbscan.py | 1 + 2 files changed, 2 insertions(+) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 17af9e37d5e93..13b28059cd0b0 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -513,6 +513,7 @@ def fit(self, X, y=None): Fitted estimator. """ + # Validating the scalar parameters. check_scalar( self.threshold, "threshold", diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 72c9a3827807e..0994248e01697 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -360,6 +360,7 @@ def fit(self, X, y=None, sample_weight=None): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place + # Validating the scalar parameters. check_scalar( self.eps, "eps", From 6e4b64928bc3587394d5901db60962d5f2c021a1 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Tue, 5 Oct 2021 12:21:50 +0530 Subject: [PATCH 15/17] Removed unnecessary imports --- sklearn/cluster/_birch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 13b28059cd0b0..6ae234d179234 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -14,7 +14,7 @@ from ..base import TransformerMixin, ClusterMixin, BaseEstimator from ..utils.extmath import row_norms from ..utils import check_scalar, deprecated -from ..utils.validation import _num_samples, check_is_fitted +from ..utils.validation import check_is_fitted from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering from .._config import config_context From 545ebd050d7ee63bea318def2894351b61da3676 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Tue, 5 Oct 2021 14:39:05 +0530 Subject: [PATCH 16/17] Made corrections according to p limits --- sklearn/cluster/tests/test_dbscan.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index b593d83d45bb8..40949e81a24b1 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -448,8 +448,7 @@ def test_dbscan_precomputed_metric_with_initial_rows_zero(): " 'float'>.", ), ({"leaf_size": -3}, ValueError, "leaf_size == -3, must be >= 1."), - ({"p": 0}, ValueError, "p == 0, must be >= 1.0."), - ({"p": -2}, ValueError, "p == -2, must be >= 1.0."), + ({"p": -2}, ValueError, "p == -2, must be >= 0.0."), ( {"n_jobs": 2.5}, TypeError, From 468c5f215d4f6d4c6b2572ba4674991c2a80e513 Mon Sep 17 00:00:00 2001 From: SanjayMarreddi Date: Sat, 9 Oct 2021 15:23:24 +0530 Subject: [PATCH 17/17] Removed default arg of include_boundaries --- sklearn/cluster/_birch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 6ae234d179234..b1f3fb5a64a74 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -534,7 +534,6 @@ def fit(self, X, y=None): "n_clusters", target_type=numbers.Integral, min_val=1, - include_boundaries="both", ) # TODO: Remove deprected flags in 1.2