8000 Merge pull request #4995 from yanlend/patch-1 · scikit-learn/scikit-learn@3ab597c · GitHub
[go: up one dir, main page]

Skip to content

Commit 3ab597c

Browse files
committed
Merge pull request #4995 from yanlend/patch-1
[MRG + 1] Bug fix for unnormalized laplacian
2 parents 40b541e + 56796e4 commit 3ab597c

File tree

3 files changed

+40
-8
lines changed

3 files changed

+40
-8
lines changed

doc/whats_new.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ Enhancements
2020

2121
Bug fixes
2222
.........
23+
- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
24+
Laplacian matrix was incorrectly set to 1. By `Peter Fischer`_.
25+
2326

2427
API changes summary
2528
-------------------
@@ -261,7 +264,7 @@ Bug fixes
261264
in the final fit. By `Manoj Kumar`_.
262265

263266
- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
264-
oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.
267+
oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.
265268

266269
- All regressors now consistently handle and warn when given ``y`` that is of
267270
shape ``(n_samples, 1)``. By `Andreas Müller`_.
@@ -3772,6 +3775,8 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
37723775

37733776
.. _Loic Esteve: https://github.com/lesteve
37743777

3778+
.. _Peter Fischer: https://github.com/yanlend
3779+
37753780
.. _Brian McFee: https://bmcfee.github.io
37763781

37773782
.. _Vighnesh Birodkar: https://github.com/vighneshbirodkar

sklearn/manifold/spectral_embedding_.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _graph_is_connected(graph):
7878
return _graph_connected_component(graph, 0).sum() == graph.shape[0]
7979

8080

81-
def _set_diag(laplacian, value):
81+
def _set_diag(laplacian, value, norm_laplacian):
8282
"""Set the diagonal of the laplacian matrix and convert it to a
8383
sparse format well suited for eigenvalue decomposition
8484
@@ -88,6 +88,8 @@ def _set_diag(laplacian, value):
8888
The graph laplacian
8989
value : float
9090
The value of the diagonal
91+
norm_laplacian : bool
92+
Whether the value of the diagonal should be changed or not
9193
9294
Returns
9395
-------
@@ -99,11 +101,13 @@ def _set_diag(laplacian, value):
99101
n_nodes = laplacian.shape[0]
100102
# We need all entries in the diagonal to values
101103
if not sparse.isspmatrix(laplacian):
102-
laplacian.flat[::n_nodes + 1] = value
104+
if norm_laplacian:
105+
laplacian.flat[::n_nodes + 1] = value
103106
else:
104107
laplacian = laplacian.tocoo()
105-
diag_idx = (laplacian.row == laplacian.col)
106-
laplacian.data[diag_idx] = value
108+
if norm_laplacian:
109+
diag_idx = (laplacian.row == laplacian.col)
110+
laplacian.data[diag_idx] = value
107111
# If the matrix has a small number of diagonals (as in the
108112
# case of structured matrices coming from images), the
109113
# dia format might be best suited for matvec products:
@@ -229,7 +233,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
229233
# /lobpcg/lobpcg.py#L237
230234
# or matlab:
231235
# http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
232-
laplacian = _set_diag(laplacian, 1)
236+
laplacian = _set_diag(laplacian, 1, norm_laplacian)
233237

234238
# Here we'll use shift-invert mode for fast eigenvalues
235239
# (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
@@ -268,7 +272,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
268272
# lobpcg needs double precision floats
269273
laplacian = check_array(laplacian, dtype=np.float64,
270274
accept_sparse=True)
271-
laplacian = _set_diag(laplacian, 1)
275+
laplacian = _set_diag(laplacian, 1, norm_laplacian)
272276
ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
273277
M = ml.aspreconditioner()
274278
X = random_state.rand(laplacian.shape[0], n_components + 1)
@@ -292,7 +296,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
292296
lambdas, diffusion_map = eigh(laplacian)
293297
embedding = diffusion_map.T[:n_components] * dd
294298
else:
295-
laplacian = _set_diag(laplacian, 1)
299+
laplacian = _set_diag(laplacian, 1, norm_laplacian)
296300
# We increase the number of eigenvectors requested, as lobpcg
297301
# doesn't behave well in low dimension
298302
X = random_state.rand(laplacian.shape[0], n_components + 1)

sklearn/manifold/tests/test_spectral_embedding.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from scipy.sparse import csr_matrix
55
from scipy.sparse import csc_matrix
6+
from scipy.linalg import eigh
67
import numpy as np
78
from numpy.testing import assert_array_almost_equal, assert_array_equal
89

@@ -16,6 +17,8 @@
1617
from sklearn.metrics import normalized_mutual_info_score
1718
from sklearn.cluster import KMeans
1819
from sklearn.datasets.samples_generator import make_blobs
20+
from sklearn.utils.graph import graph_laplacian
21+
from sklearn.utils.extmath import _deterministic_vector_sign_flip
1922

2023

2124
# non centered, sparse centers to check the
@@ -192,3 +195,23 @@ def test_spectral_embedding_deterministic():
192195
embedding_1 = spectral_embedding(sims)
193196
embedding_2 = spectral_embedding(sims)
194197
assert_array_almost_equal(embedding_1, embedding_2)
198+
199+
200+
def test_spectral_embedding_unnormalized():
201+
# Test that spectral_embedding is also processing unnormalized laplacian correctly
202+
random_state = np.random.RandomState(36)
203+
data = random_state.randn(10, 30)
204+
sims = rbf_kernel(data)
205+
n_components = 8
206+
embedding_1 = spectral_embedding(sims,
207+
norm_laplacian=False,
208+
n_components=n_components,
209+
drop_first=False)
210+
211+
# Verify using manual computation with dense eigh
212+
laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
213+
_, diffusion_map = eigh(laplacian)
214+
embedding_2 = diffusion_map.T[:n_components] * dd
215+
embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
216+
217+
assert_array_almost_equal(embedding_1, embedding_2)

0 commit comments

Comments
 (0)
0