8000 MAINT refactor spectral_clustering to call SpectralClustering (#25392) · dolfly/scikit-learn@1f12941 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1f12941

Browse files
authored
MAINT refactor spectral_clustering to call SpectralClustering (scikit-learn#25392)
1 parent 9956210 commit 1f12941

File tree

2 files changed

+37
-46
lines changed

2 files changed

+37
-46
lines changed

sklearn/cluster/_spectral.py

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ def discretize(
139139
# If there is an exception we try to randomize and rerun SVD again
140140
# do this max_svd_restarts times.
141141
while (svd_restarts < max_svd_restarts) and not has_converged:
142-
143142
# Initialize first column of rotation matrix with a row of the
144143
# eigenvectors
145144
rotation = np.zeros((n_components, n_components))
@@ -345,50 +344,20 @@ def spectral_clustering(
345344
David Zhuzhunashvili, Andrew Knyazev
346345
<10.1109/HPEC.2017.8091045>`
347346
"""
348-
if assign_labels not in ("kmeans", "discretize", "cluster_qr"):
349-
raise ValueError(
350-
"The 'assign_labels' parameter should be "
351-
"'kmeans' or 'discretize', or 'cluster_qr', "
352-
f"but {assign_labels!r} was given"
353-
)
354-
if isinstance(affinity, np.matrix):
355-
raise TypeError(
356-
"spectral_clustering does not support passing in affinity as an "
357-
"np.matrix. Please convert to a numpy array with np.asarray. For "
358-
"more information see: "
359-
"https://numpy.org/doc/stable/reference/generated/numpy.matrix.html", # noqa
360-
)
361347

362-
random_state = check_random_state(random_state)
363-
n_components = n_clusters if n_components is None else n_components
364-
365-
# We now obtain the real valued solution matrix to the
366-
# relaxed Ncut problem, solving the eigenvalue problem
367-
# L_sym x = lambda x and recovering u = D^-1/2 x.
368-
# The first eigenvector is constant only for fully connected graphs
369-
# and should be kept for spectral clustering (drop_first = False)
370-
# See spectral_embedding documentation.
371-
maps = spectral_embedding(
372-
affinity,
348+
clusterer = SpectralClustering(
349+
n_clusters=n_clusters,
373350
n_components=n_components,
374351
eigen_solver=eigen_solver,
375352
random_state=random_state,
353+
n_init=n_init,
354+
affinity="precomputed",
376355
eigen_tol=eigen_tol,
377-
drop_first=False,
378-
)
379-
if verbose:
380-
print(f"Computing label assignment using {assign_labels}")
381-
382-
if assign_labels == "kmeans":
383-
_, labels, _ = k_means(
384-
maps, n_clusters, random_state=random_state, n_init=n_init, verbose=verbose
385-
)
386-
elif assign_labels == "cluster_qr":
387-
labels = cluster_qr(maps)
388-
else:
389-
labels = discretize(maps, random_state=random_state)
356+
assign_labels=assign_labels,
357+
verbose=verbose,
358+
).fit(affinity)
390359

391-
return labels
360+
return clusterer.labels_
392361

393362

394363
class SpectralClustering(ClusterMixin, BaseEstimator):
@@ -747,17 +716,39 @@ def fit(self, X, y=None):
747716
)
748717

749718
random_state = check_random_state(self.random_state)
750-
self.labels_ = spectral_clustering(
719+
n_components = (
720+
self.n_clusters if self.n_components is None else self.n_components
721+
)
722+
# We now obtain the real valued solution matrix to the
723+
# relaxed Ncut problem, solving the eigenvalue problem
724+
# L_sym x = lambda x and recovering u = D^-1/2 x.
725+
# The first eigenvector is constant only for fully connected graphs
726+
# and should be kept for spectral clustering (drop_first = False)
727+
# See spectral_embedding documentation.
728+
maps = spectral_embedding(
751729
self.affinity_matrix_,
752-
n_clusters=self.n_clusters,
753-
n_components=self.n_components,
730+
n_components=n_components,
754731
eigen_solver=self.eigen_solver,
755732
random_state=random_state,
756-
n_init=self.n_init,
757733
eigen_tol=self.eigen_tol,
758-
assign_labels=self.assign_labels,
759-
verbose=self.verbose,
734+
drop_first=False,
760735
)
736+
if self.verbose:
737+
print(f"Computing label assignment using {self.assign_labels}")
738+
739+
if self.assign_labels == "kmeans":
740+
_, self.labels_, _ = k_means(
741+
maps,
742+
self.n_clusters,
743+
random_state=random_state,
744+
n_init=self.n_init,
745+
verbose=self.verbose,
746+
)
747+
elif self.assign_labels == "cluster_qr":
748+
self.labels_ = cluster_qr(maps)
749+
else:
750+
self.labels_ = discretize(maps, random_state=random_state)
751+
761752
return self
762753

763754
def fit_predict(self, X, y=None):

sklearn/cluster/tests/test_spectral.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def test_spectral_clustering_np_matrix_raises():
309309
a np.matrix. See #10993"""
310310
X = np.matrix([[0.0, 2.0], [2.0, 0.0]])
311311

312-
msg = r"spectral_clustering does not support passing in affinity as an np\.matrix"
312+
msg = r"np\.matrix is not supported. Please convert to a numpy array"
313313
with pytest.raises(TypeError, match=msg):
314314
spectral_clustering(X)
315315

0 commit comments

Comments
 (0)
0