DOC: document better similarity matrix of spectral clustering

GaelVaroquaux · GaelVaroquaux · commit 0bc70b139f9c · 2011-02-24T22:15:06.000+01:00
diff --git a/scikits/learn/cluster/spectral.py b/scikits/learn/cluster/spectral.py
@@ -15,7 +15,7 @@
 
 def spectral_embedding(adjacency, k=8, mode=None):
     """ Spectral embedding: project the sample on the k first
-        eigen vectors of the graph laplacian.
+        eigen vectors of the normalized graph Laplacian.
 
         Parameters
         -----------
@@ -123,6 +123,9 @@ def spectral_clustering(adjacency, k=8, mode=None):
         ------
         The graph should contain only one connect component,
         elsewhere the results make little sens.
+
+        This algorithm solves the normalized cut for k=2: it is a
+        normalized spectral clustering.
     """
     maps = spectral_embedding(adjacency, k=k, mode=mode)
     maps = maps[1:]
@@ -172,9 +175,21 @@ def fit(self, X, **params):
             -----------
             X: array-like or sparse matrix, shape: (p, p)
                 The adjacency matrix of the graph to embed.
+                X is an adjacency matrix of a simimlarity graph: its
+                entries must be positive or zero. Zero means that
+                elements have nothing in comon, wereas high values mean
+                that elements are strongly similar.
 
             Notes
             ------
+            If you have an affinity matrix, such as a distance matrix,
+            for which 0 means identical elements, and high values means
+            very dissimilar elements, it can be transformed in a
+            simimlarity matrix that is well suited for the algorithm by 
+            applying the heat kernel::
+
+                np.exp(- X**2/2. * delta**2)
+
             If the pyamg package is installed, it is used. This
             greatly speeds up computation.
         """