scikit-learn · GaelVaroquaux · Oct 19, 2015 · Jul 17, 2015 · Jul 20, 2015 · Jul 20, 2015
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -20,6 +20,9 @@ Enhancements
 
 Bug fixes
 .........
+    - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized 
+      Laplacian matrix was incorrectly set to 1. By `Peter Fischer`_.
+
 
 API changes summary
 -------------------
@@ -261,7 +264,7 @@ Bug fixes
       in the final fit. By `Manoj Kumar`_.
 
     - Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
-      oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.
+      oob_score and X is a sparse.csc_matrix. By `Ankur Ankan`_.     
 
     - All regressors now consistently handle and warn when given ``y`` that is of
       shape ``(n_samples, 1)``. By `Andreas Müller`_.
@@ -3772,6 +3775,8 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 
 .. _Loic Esteve: https://github.com/lesteve
 
+.. _Peter Fischer: https://github.com/yanlend
+
 .. _Brian McFee: https://bmcfee.github.io
 
 .. _Vighnesh Birodkar: https://github.com/vighneshbirodkar

diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
@@ -78,7 +78,7 @@ def _graph_is_connected(graph):
         return _graph_connected_component(graph, 0).sum() == graph.shape[0]
 
 
-def _set_diag(laplacian, value):
+def _set_diag(laplacian, value, norm_laplacian):
     """Set the diagonal of the laplacian matrix and convert it to a
     sparse format well suited for eigenvalue decomposition
 
@@ -88,6 +88,8 @@ def _set_diag(laplacian, value):
         The graph laplacian
     value : float
         The value of the diagonal
+    norm_laplacian : bool
+        Whether the value of the diagonal should be changed or not
 
     Returns
     -------
@@ -99,11 +101,13 @@ def _set_diag(laplacian, value):
     n_nodes = laplacian.shape[0]
     # We need all entries in the diagonal to values
     if not sparse.isspmatrix(laplacian):
-        laplacian.flat[::n_nodes + 1] = value
+        if norm_laplacian:
+            laplacian.flat[::n_nodes + 1] = value
     else:
         laplacian = laplacian.tocoo()
-        diag_idx = (laplacian.row == laplacian.col)
-        laplacian.data[diag_idx] = value
+        if norm_laplacian:
+            diag_idx = (laplacian.row == laplacian.col)
+            laplacian.data[diag_idx] = value
         # If the matrix has a small number of diagonals (as in the
         # case of structured matrices coming from images), the
         # dia format might be best suited for matvec products:
@@ -229,7 +233,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         # /lobpcg/lobpcg.py#L237
         # or matlab:
         # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
-        laplacian = _set_diag(laplacian, 1)
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
 
         # Here we'll use shift-invert mode for fast eigenvalues
         # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
@@ -268,7 +272,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         # lobpcg needs double precision floats
         laplacian = check_array(laplacian, dtype=np.float64,
                                 accept_sparse=True)
-        laplacian = _set_diag(laplacian, 1)
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
         ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
         M = ml.aspreconditioner()
         X = random_state.rand(laplacian.shape[0], n_components + 1)
@@ -292,7 +296,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
             lambdas, diffusion_map = eigh(laplacian)
             embedding = diffusion_map.T[:n_components] * dd
         else:
-            laplacian = _set_diag(laplacian, 1)
+            laplacian = _set_diag(laplacian, 1, norm_laplacian)
             # We increase the number of eigenvectors requested, as lobpcg
             # doesn't behave well in low dimension
             X = random_state.rand(laplacian.shape[0], n_components + 1)

diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -3,6 +3,7 @@
 
 from scipy.sparse import csr_matrix
 from scipy.sparse import csc_matrix
+from scipy.linalg import eigh
 import numpy as np
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 
@@ -16,6 +17,8 @@
 from sklearn.metrics import normalized_mutual_info_score
 from sklearn.cluster import KMeans
 from sklearn.datasets.samples_generator import make_blobs
+from sklearn.utils.graph import graph_laplacian
+from sklearn.utils.extmath import _deterministic_vector_sign_flip
 
 
 # non centered, sparse centers to check the
@@ -192,3 +195,23 @@ def test_spectral_embedding_deterministic():
     embedding_1 = spectral_embedding(sims)
     embedding_2 = spectral_embedding(sims)
     assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_unnormalized():
+    # Test that spectral_embedding is also processing unnormalized laplacian correctly
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 8
+    embedding_1 = spectral_embedding(sims,
+                                     norm_laplacian=False,
+                                     n_components=n_components,
+                                     drop_first=False)
+
+    # Verify using manual computation with dense eigh
+    laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
+    _, diffusion_map = eigh(laplacian)
+    embedding_2 = diffusion_map.T[:n_components] * dd
+    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
+
+    assert_array_almost_equal(embedding_1, embedding_2)