8000 ENH/FIX add a lobpcg solver to spectral embedding · seckcoder/scikit-learn@2bb7b91 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2bb7b91

Browse files
committed
ENH/FIX add a lobpcg solver to spectral embedding
This makes the solver a bit better behaved on some problems
1 parent 3f9d73d commit 2bb7b91

File tree

2 files changed

+42
-24
lines changed

2 files changed

+42
-24
lines changed

sklearn/cluster/spectral.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def spectral_embedding(adjacency, n_components=8, mode=None,
4141
n_components: integer, optional
4242
The dimension of the projection subspace.
4343
44-
mode: {None, 'arpack' or 'amg'}
44+
mode: {None, 'arpack', 'lobpcg', or 'amg'}
4545
The eigenvalue decomposition strategy to use. AMG requires pyamg
4646
to be installed. It can be faster on very large, sparse problems,
4747
but may also lead to instabilities
@@ -78,6 +78,9 @@ def spectral_embedding(adjacency, n_components=8, mode=None,
7878
# XXX: Should we check that the matrices given is symmetric
7979
if mode is None:
8080
mode = 'arpack'
81+
elif not mode in ('arpack', 'lobpcg', 'amg'):
82+
raise ValueError("Unknown value for mode: '%s'."
83+
"Should be 'amg' or 'arpack'" % mode)
8184
laplacian, dd = graph_laplacian(adjacency,
8285
normed=True, return_diag=True)
8386
if (mode == 'arpack'
@@ -118,25 +121,39 @@ def spectral_embedding(adjacency, n_components=8, mode=No 10000 ne,
118121
# near 1.0 and leads to much faster convergence: potentially an
119122
# orders-of-magnitude speedup over simply using keyword which='LA'
120123
# in standard mode.
121-
lambdas, diffusion_map = eigsh(-laplacian, k=n_components,
122-
sigma=1.0, which='LM')
123-
embedding = diffusion_map.T[::-1] * dd
124-
elif mode == 'amg':
124+
try:
125+
lambdas, diffusion_map = eigsh(-laplacian, k=n_components,
126+
sigma=1.0, which='LM')
127+
embedding = diffusion_map.T[::-1] * dd
128+
except RuntimeError:
129+
# When submatrices are exactly singular, an LU decomposition
130+
# in arpack fails. We fallback to lobpcg
131+
mode = "lobpcg"
132+
133+
if mode == 'amg':
125134
# Use AMG to get a preconditioner and speed up the eigenvalue
126135
# problem.
127136
laplacian = laplacian.astype(np.float) # lobpcg needs native floats
128137
ml = smoothed_aggregation_solver(laplacian.tocsr())
138+
M = ml.aspreconditioner()
129139
X = random_state.rand(laplacian.shape[0], n_components)
130140
X[:, 0] = 1. / dd.ravel()
131-
M = ml.aspreconditioner()
132141
lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
133142
largest=False)
134143
embedding = diffusion_map.T * dd
135144
if embedding.shape[0] == 1:
136145
raise ValueError
137-
else:
138-
raise ValueError("Unknown value for mode: '%s'."
139-
"Should be 'amg' or 'arpack'" % mode)
146+
elif mode == "lobpcg":
147+
# We increase the number of eigenvectors requested, as lobpcg
148+
# doesn't behave well in low dimension
149+
X = random_state.rand(laplacian.shape[0], n_components + 4)
150+
X[:, 0] = 1. / dd.ravel()
151+
lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
152+
largest=False, maxiter=2000,
153+
verbosityLevel=20)
154+
embedding = diffusion_map.T[:n_components] * dd
155+
if embedding.shape[0] == 1:
156+
raise ValueError
140157
return embedding
141158

142159

sklearn/cluster/tests/test_spectral.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,22 @@ def test_spectral_clustering():
2525
[0, 0, 0, 1, 2, 4, 1],
2626
])
2727

28-
for mat in (S, sparse.csr_matrix(S)):
29-
model = SpectralClustering(random_state=0, n_clusters=2,
30-
affinity='precomputed').fit(mat)
31-
labels = model.labels_
32-
if labels[0] == 0:
33-
labels = 1 - labels
34-
35-
assert_equal(labels, [1, 1, 1, 0, 0, 0, 0])
36-
37-
model_copy = loads(dumps(model))
38-
assert_equal(model_copy.n_clusters, model.n_clusters)
39-
assert_equal(model_copy.mode, model.mode)
40-
assert_equal(model_copy.random_state.get_state(),
41-
model.random_state.get_state())
42-
assert_equal(model_copy.labels_, model.labels_)
28+
for mode in ('arpack', 'lobpcg'):
29+
for mat in (S, sparse.csr_matrix(S)):
30+
model = SpectralClustering(random_state=0, n_clusters=2,
31+
affinity='precomputed', mode=mode).fit(mat)
32+
labels = model.labels_
33+
if labels[0] == 0:
34+
labels = 1 - labels
35+
36+
assert_equal(labels, [1, 1, 1, 0, 0, 0, 0])
37+
38+
model_copy = loads(dumps(model))
39+
assert_equal(model_copy.n_clusters, model.n_clusters)
40+
assert_equal(model_copy.mode, model.mode)
41+
assert_equal(model_copy.random_state.get_state(),
42+
model.random_state.get_state())
43+
assert_equal(model_copy.labels_, model.labels_)
4344

4445

4546
def test_spectral_amg_mode():

0 commit comments

Comments
 (0)
0