diff --git a/doc/whats_new.rst b/doc/whats_new.rst index e468ddd4093d1..b5d988b446ab9 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -20,6 +20,9 @@ Enhancements Bug fixes ......... + - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized + Laplacian matrix was incorrectly set to 1. By `Peter Fischer`_. + API changes summary ------------------- @@ -261,7 +264,7 @@ Bug fixes in the final fit. By `Manoj Kumar`_. - Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing - oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_. + oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_. - All regressors now consistently handle and warn when given ``y`` that is of shape ``(n_samples, 1)``. By `Andreas Müller`_. @@ -3772,6 +3775,8 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Loic Esteve: https://github.com/lesteve +.. _Peter Fischer: https://github.com/yanlend + .. _Brian McFee: https://bmcfee.github.io .. _Vighnesh Birodkar: https://github.com/vighneshbirodkar diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 26825d8deb933..b610d95496e90 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -78,7 +78,7 @@ def _graph_is_connected(graph): return _graph_connected_component(graph, 0).sum() == graph.shape[0] -def _set_diag(laplacian, value): +def _set_diag(laplacian, value, norm_laplacian): """Set the diagonal of the laplacian matrix and convert it to a sparse format well suited for eigenvalue decomposition @@ -88,6 +88,8 @@ def _set_diag(laplacian, value): The graph laplacian value : float The value of the diagonal + norm_laplacian : bool + Whether the value of the diagonal should be changed or not Returns ------- @@ -99,11 +101,13 @@ def _set_diag(laplacian, value): n_nodes = laplacian.shape[0] # We need all entries in the diagonal to values if not sparse.isspmatrix(laplacian): - laplacian.flat[::n_nodes + 1] = value + if norm_laplacian: + laplacian.flat[::n_nodes + 1] = value else: laplacian = laplacian.tocoo() - diag_idx = (laplacian.row == laplacian.col) - laplacian.data[diag_idx] = value + if norm_laplacian: + diag_idx = (laplacian.row == laplacian.col) + laplacian.data[diag_idx] = value # If the matrix has a small number of diagonals (as in the # case of structured matrices coming from images), the # dia format might be best suited for matvec products: @@ -229,7 +233,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # /lobpcg/lobpcg.py#L237 # or matlab: # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m - laplacian = _set_diag(laplacian, 1) + laplacian = _set_diag(laplacian, 1, norm_laplacian) # Here we'll use shift-invert mode for fast eigenvalues # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html @@ -268,7 +272,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # lobpcg needs double precision floats laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True) - laplacian = _set_diag(laplacian, 1) + laplacian = _set_diag(laplacian, 1, norm_laplacian) ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) @@ -292,7 +296,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, lambdas, diffusion_map = eigh(laplacian) embedding = diffusion_map.T[:n_components] * dd else: - laplacian = _set_diag(laplacian, 1) + laplacian = _set_diag(laplacian, 1, norm_laplacian) # We increase the number of eigenvectors requested, as lobpcg # doesn't behave well in low dimension X = random_state.rand(laplacian.shape[0], n_components + 1) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index 846445632cfb0..d6ef8dc2e0179 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -3,6 +3,7 @@ from scipy.sparse import csr_matrix from scipy.sparse import csc_matrix +from scipy.linalg import eigh import numpy as np from numpy.testing import assert_array_almost_equal, assert_array_equal @@ -16,6 +17,8 @@ from sklearn.metrics import normalized_mutual_info_score from sklearn.cluster import KMeans from sklearn.datasets.samples_generator import make_blobs +from sklearn.utils.graph import graph_laplacian +from sklearn.utils.extmath import _deterministic_vector_sign_flip # non centered, sparse centers to check the @@ -192,3 +195,23 @@ def test_spectral_embedding_deterministic(): embedding_1 = spectral_embedding(sims) embedding_2 = spectral_embedding(sims) assert_array_almost_equal(embedding_1, embedding_2) + + +def test_spectral_embedding_unnormalized(): + # Test that spectral_embedding is also processing unnormalized laplacian correctly + random_state = np.random.RandomState(36) + data = random_state.randn(10, 30) + sims = rbf_kernel(data) + n_components = 8 + embedding_1 = spectral_embedding(sims, + norm_laplacian=False, + n_components=n_components, + drop_first=False) + + # Verify using manual computation with dense eigh + laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True) + _, diffusion_map = eigh(laplacian) + embedding_2 = diffusion_map.T[:n_components] * dd + embedding_2 = _deterministic_vector_sign_flip(embedding_2).T + + assert_array_almost_equal(embedding_1, embedding_2)