8000 ENH Add verbose option to SpectralClustering (#18052) · simonamaggio/scikit-learn@603d05b · GitHub
[go: up one dir, main page]

Skip to content

Commit 603d05b

Browse files
ENH Add verbose option to SpectralClustering (scikit-learn#18052)
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
1 parent eff1bdf commit 603d05b

File tree

3 files changed

+43
-4
lines changed

3 files changed

+43
-4
lines changed

doc/whats_new/v0.24.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ Changelog
7272
:user:`Emilie Delattre <EmilieDel>`, and
7373
:user:`Francesco Casalegno <FrancescoCasalegno>`.
7474

75+
- |Enhancement| :class:`cluster.SpectralClustering` and
76+
:func:`cluster.spectral_clustering` have a new keyword argument `verbose`.
77+
When set to `True`, additional messages will be displayed which can aid with
78+
debugging. :pr:`18052` by :user:`Sean O. Stalley <sstalley>`.
79+
7580
- |API| :class:`cluster.MiniBatchKMeans` attributes, `counts_` and
7681
`init_size_`, are deprecated and will be removed in 0.26. :pr:`17864` by
7782
:user:`Jérémie du Boisberranger <jeremiedbb>`.

sklearn/cluster/_spectral.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ def discretize(vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20,
160160
@_deprecate_positional_args
161161
def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
162162
eigen_solver=None, random_state=None, n_init=10,
163-
eigen_tol=0.0, assign_labels='kmeans'):
163+
eigen_tol=0.0, assign_labels='kmeans',
164+
verbose=False):
164165
"""Apply clustering to a projection of the normalized Laplacian.
165166
166167
In practice Spectral Clustering is very useful when the structure of
@@ -222,6 +223,11 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
222223
the 'Multiclass spectral clustering' paper referenced below for
223224
more details on the discretization approach.
224225
226+
verbose : bool, default=False
227+
Verbosity mode.
228+
229+
.. versionadded:: 0.24
230+
225231
Returns
226 8000 232
-------
227233
labels : array of integers, shape: n_samples
@@ -265,10 +271,12 @@ def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
265271
eigen_solver=eigen_solver,
266272
random_state=random_state,
267273
eigen_tol=eigen_tol, drop_first=False)
274+
if verbose:
275+
print(f'Computing label assignment using {assign_labels}')
268276

269277
if assign_labels == 'kmeans':
270278
_, labels, _ = k_means(maps, n_clusters, random_state=random_state,
271-
n_init=n_init)
279+
n_init=n_init, verbose=verbose)
272280
else:
273281
labels = discretize(maps, random_state=random_state)
274282

@@ -381,6 +389,11 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
381389
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
382390
for more details.
383391
392+
verbose : bool, default=False
393+
Verbosity mode.
394+
395+
.. versionadded:: 0.24
396+
384397
Attributes
385398
----------
386399
affinity_matrix_ : array-like of shape (n_samples, n_samples)
@@ -443,7 +456,8 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
443456
def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
444457
random_state=None, n_init=10, gamma=1., affinity='rbf',
445458
n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',
446-
degree=3, coef0=1, kernel_params=None, n_jobs=None):
459+
degree=3, coef0=1, kernel_params=None, n_jobs=None,
460+
verbose=False):
447461
self.n_clusters = n_clusters
448462
self.eigen_solver = eigen_solver
449463
self.n_components = n_components
@@ -458,6 +472,7 @@ def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
458472
self.coef0 = coef0
459473
self.kernel_params = kernel_params
460474
self.n_jobs = n_jobs
475+
self.verbose = verbose
461476

462477
def fit(self, X, y=None):
463478
"""Perform spectral clustering from features, or affinity matrix.
@@ -523,7 +538,8 @@ def fit(self, X, y=None):
523538
random_state=random_state,
524539
n_init=self.n_init,
525540
eigen_tol=self.eigen_tol,
526-
assign_labels=self.assign_labels)
541+
assign_labels=self.assign_labels,
542+
verbose=self.verbose)
527543
return self
528544

529545
def fit_predict(self, X, y=None):

sklearn/cluster/tests/test_spectral.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Testing for Spectral Clustering methods"""
2+
import re
23

34
import numpy as np
45
from scipy import sparse
@@ -248,3 +249,20 @@ def test_n_components():
248249
labels_diff_ncomp = SpectralClustering(n_components=2,
249250
random_state=0).fit(X).labels_
250251
assert not np.array_equal(labels, labels_diff_ncomp)
252+
253+
254+
@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
255+
def test_verbose(assign_labels, capsys):
256+
# Check verbose mode of KMeans for better coverage.
257+
X, y = make_blobs(n_samples=20, random_state=0,
258+
centers=[[1, 1], [-1, -1]], cluster_std=0.01)
259+
260+
SpectralClustering(n_clusters=2, random_state=42, verbose=1).fit(X)
261+
262+
captured = capsys.readouterr()
263+
264+
assert re.search(r"Computing label assignment using", captured.out)
265+
266+
if assign_labels == "kmeans":
267+
assert re.search(r"Initialization complete", captured.out)
268+
assert re.search(r"Iteration [0-9]+, inertia", captured.out)

0 commit comments

Comments
 (0)
0