10000 [MRG + 1] Bug fix for unnormalized laplacian by yanlend · Pull Request #4995 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG + 1] Bug fix for unnormalized laplacian #4995

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 19, 2015
Merged
7 changes: 6 additions & 1 deletion doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Enhancements

Bug fixes
.........
- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
Laplacian matrix was incorrectly set to 1. By `Peter Fischer`_.


API changes summary
-------------------
Expand Down Expand Up @@ -261,7 +264,7 @@ Bug fixes
in the final fit. By `Manoj Kumar`_.

- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.
oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.

- All regressors now consistently handle and warn when given ``y`` that is of
shape ``(n_samples, 1)``. By `Andreas Müller`_.
Expand Down Expand Up @@ -3772,6 +3775,8 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.

.. _Loic Esteve: https://github.com/lesteve

.. _Peter Fischer: https://github.com/yanlend

.. _Brian McFee: https://bmcfee.github.io

.. _Vighnesh Birodkar: https://github.com/vighneshbirodkar
Expand Down
18 changes: 11 additions & 7 deletions sklearn/manifold/spectral_embedding_.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _graph_is_connected(graph):
return _graph_connected_component(graph, 0).sum() == graph.shape[0]


def _set_diag(laplacian, value):
def _set_diag(laplacian, value, norm_laplacian):
"""Set the diagonal of the laplacian matrix and convert it to a
sparse format well suited for eigenvalue decomposition

Expand All @@ -88,6 +88,8 @@ def _set_diag(laplacian, value):
The graph laplacian
value : float
The value of the diagonal
norm_laplacian : bool
Whether the value of the diagonal should be changed or not

Returns
-------
Expand All @@ -99,11 +101,13 @@ def _set_diag(laplacian, value):
n_nodes = laplacian.shape[0]
# We need all entries in the diagonal to values
if not sparse.isspmatrix(laplacian):
laplacian.flat[::n_nodes + 1] = value
if norm_laplacian:
laplacian.flat[::n_nodes + 1] = value
else:
laplacian = laplacian.tocoo()
diag_idx = (laplacian.row == laplacian.col)
laplacian.data[diag_idx] = value
if norm_laplacian:
diag_idx = (laplacian.row == laplacian.col)
laplacian.data[diag_idx] = value
# If the matrix has a small number of diagonals (as in the
# case of structured matrices coming from images), the
# dia format might be best suited for matvec products:
Expand Down Expand Up @@ -229,7 +233,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
# /lobpcg/lobpcg.py#L237
# or matlab:
# http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
laplacian = _set_diag(laplacian, 1)
laplacian = _set_diag(laplacian, 1, norm_laplacian)

# Here we'll use shift-invert mode for fast eigenvalues
# (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
Expand Down Expand Up @@ -268,7 +272,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
# lobpcg needs double precision floats
laplacian = check_array(laplacian, dtype=np.float64,
accept_sparse=True)
laplacian = _set_diag(laplacian, 1)
laplacian = _set_diag(laplacian, 1, norm_laplacian)
ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
M = ml.aspreconditioner()
X = random_state.rand(laplacian.shape[0], n_components + 1)
Expand All @@ -292,7 +296,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
lambdas, diffusion_map = eigh(laplacian)
embedding = diffusion_map.T[:n_components] * dd
else:
laplacian = _set_diag(laplacian, 1)
laplacian = _set_diag(laplacian, 1, norm_laplacian)
# We increase the number of eigenvectors requested, as lobpcg
# doesn't behave well in low dimension
X = random_state.rand(laplacian.shape[0], n_components + 1)
Expand Down
23 changes: 23 additions & 0 deletions sklearn/manifold/tests/test_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from scipy.sparse import csr_matrix
from scipy.sparse import csc_matrix
from scipy.linalg import eigh
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_array_equal

Expand All @@ -16,6 +17,8 @@
from sklearn.metrics import normalized_mutual_info_score
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
from sklearn.utils.graph import graph_laplacian
from sklearn.utils.extmath import _deterministic_vector_sign_flip


# non centered, sparse centers to check the
Expand Down Expand Up @@ -192,3 +195,23 @@ def test_spectral_embedding_deterministic():
embedding_1 = spectral_embedding(sims)
embedding_2 = spectral_embedding(sims)
assert_array_almost_equal(embedding_1, embedding_2)


def test_spectral_embedding_unnormalized():
# Test that spectral_embedding is also processing unnormalized laplacian correctly
random_state = np.random.RandomState(36)
data = random_state.randn(10, 30)
sims = rbf_kernel(data)
n_components = 8
embedding_1 = spectral_embedding(sims,
norm_laplacian=False,
n_components=n_components,
drop_first=False)

# Verify using manual computation with dense eigh
laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
_, diffusion_map = eigh(laplacian)
embedding_2 = diffusion_map.T[:n_components] * dd
embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

assert_array_almost_equal(embedding_1, embedding_2)
0