From 2fd375c2c96a4561277553294456692c5f5f754d Mon Sep 17 00:00:00 2001
From: Arnaud Fouchet <arnaud.fouchet@dolead.com>
Date: Fri, 13 Mar 2015 11:37:01 +0100
Subject: [PATCH 1/5] add distortion

---
 sklearn/metrics/cluster/distortion.py         | 78 +++++++++++++++++++
 .../metrics/cluster/tests/test_distortion.py  | 12 +++
 2 files changed, 90 insertions(+)
 create mode 100644 sklearn/metrics/cluster/distortion.py
 create mode 100644 sklearn/metrics/cluster/tests/test_distortion.py

diff --git a/sklearn/metrics/cluster/distortion.py b/sklearn/metrics/cluster/distortion.py
new file mode 100644
index 0000000000000..335f9f2d0a42f
--- /dev/null
+++ b/sklearn/metrics/cluster/distortion.py
@@ -0,0 +1,78 @@
+from collections import defaultdict
+
+import numpy as np
+from scipy.spatial.distance import cdist
+
+
+def distortion(X, labels, distortion_meth='sqeuclidean', p=2):
+    """
+    Given data and their cluster assigment, compute the distortion D
+
+    Parameter
+    ---------
+    X: numpy array of shape (nb_data, nb_feature)
+    labels: list of int of length nb_data
+    distortion_meth: can be a function X, labels -> float,
+        can be a string naming a scipy.spatial distance. can be in
+        ['euclidian', 'minkowski', 'seuclidiean', 'sqeuclidean', 'chebyshev'
+         'cityblock', 'cosine', 'correlation', 'hamming', 'jaccard',
+         'Bray-Curtis', 'mahalanobis', 'yule', 'matching', 'dice', 'kulsinski',
+         'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
+         'canberra', 'wminkowski'])
+    p : double
+        The p-norm to apply (for Minkowski, weighted and unweighted)
+
+    Return
+    ------
+    distortion: float
+    """
+    if isinstance(distortion_meth, str):
+        return distortion_metrics(X, labels, distortion_meth, p)
+    else:
+        return distortion_meth(X, labels)
+
+
+def distortion_metrics(X, labels, metric='sqeuclidean', p=2):
+    """
+    Given data and their cluster assigment, compute the distortion D
+
+    D = \sum_{x \in X} distance(x, c_x)
+
+    With c_x the center of the cluster containing x, distance is the distance
+    defined by metrics
+
+    Parameter
+    ---------
+    X: numpy array of shape (nb_data, nb_feature)
+    labels: list of int of length nb_data
+    metric: string naming a scipy.spatial distance. metric can be in
+        ['euclidian', 'minkowski', 'seuclidiean', 'sqeuclidean', 'chebyshev'
+         'cityblock', 'cosine', 'correlation', 'hamming', 'jaccard',
+         'Bray-Curtis', 'mahalanobis', 'yule', 'matching', 'dice', 'kulsinski',
+         'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
+         'wminkowski', 'canberra']
+    p : double
+        The p-norm to apply (for Minkowski, weighted and unweighted)
+
+    Return
+    ------
+    distortion: float
+    """
+    if metric == 'l2':
+        # Translate to something understood by scipy
+        metric = 'euclidean'
+    elif metric in ('l1', 'manhattan'):
+        metric = 'cityblock'
+
+    assi = defaultdict(list)
+    for i, l in enumerate(labels):
+        assi[l].append(i)
+
+    inertia = .0
+    nb_feature = X.shape[1]
+    for points in assi.values():
+        clu_points = X[points, :]
+        clu_center = np.mean(clu_points, axis=0).reshape(1, nb_feature)
+        inertia += np.sum(cdist(clu_points, clu_center, metric=metric, p=p))
+
+    return inertia / X.shape[1]
diff --git a/sklearn/metrics/cluster/tests/test_distortion.py b/sklearn/metrics/cluster/tests/test_distortion.py
new file mode 100644
index 0000000000000..c38db57f69220
--- /dev/null
+++ b/sklearn/metrics/cluster/tests/test_distortion.py
@@ -0,0 +1,12 @@
+import numpy as np
+
+from sklearn.utils.testing import assert_almost_equal
+
+from sklearn.metrics.cluster.distortion import distortion
+
+
+def test_distortion():
+    X = np.array([[0, 0], [2, 2],
+                  [5, 5], [6, 6]])
+    labels = [0, 0, 1, 1]
+    assert_almost_equal(distortion(X, labels), 2.5)

From 733390ae59ca4b5ececb93b1063f005833d4f17a Mon Sep 17 00:00:00 2001
From: Thierry Guillemot <thierry.guillemot.work@gmail.com>
Date: Wed, 29 Jun 2016 15:11:51 +0200
Subject: [PATCH 2/5] =?UTF-8?q?[WIP]=20ENH=20Add=20the=20OptimalNClusterSe?=
 =?UTF-8?q?arch=20class=C2=A0[ci=20skip]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 sklearn/cluster/__init__.py                   |   4 +-
 sklearn/cluster/optimal_nclusters_search.py   | 385 ++++++++++++++++++
 sklearn/metrics/cluster/distortion.py         |  78 ----
 .../metrics/cluster/tests/test_distortion.py  |  12 -
 4 files changed, 388 insertions(+), 91 deletions(-)
 create mode 100644 sklearn/cluster/optimal_nclusters_search.py
 delete mode 100644 sklearn/metrics/cluster/distortion.py
 delete mode 100644 sklearn/metrics/cluster/tests/test_distortion.py

diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py
index c9afcd98f23ce..c5c51e52e6916 100644
--- a/sklearn/cluster/__init__.py
+++ b/sklearn/cluster/__init__.py
@@ -13,6 +13,7 @@
 from .dbscan_ import dbscan, DBSCAN
 from .bicluster import SpectralBiclustering, SpectralCoclustering
 from .birch import Birch
+from .optimal_nclusters_search import OptimalNClusterSearch
 
 __all__ = ['AffinityPropagation',
            'AgglomerativeClustering',
@@ -33,4 +34,5 @@
            'spectral_clustering',
            'ward_tree',
            'SpectralBiclustering',
-           'SpectralCoclustering']
+           'SpectralCoclustering',
+           'OptimalNClusterSearch']
diff --git a/sklearn/cluster/optimal_nclusters_search.py b/sklearn/cluster/optimal_nclusters_search.py
new file mode 100644
index 0000000000000..91a3745b8df16
--- /dev/null
+++ b/sklearn/cluster/optimal_nclusters_search.py
@@ -0,0 +1,385 @@
+"""Algorithm computing the optimal value of n_clusters."""
+
+# Authors: Thierry Guillemot <thierry.guillemot.work@gmail.com>
+#          Arnaud Fouchet <foucheta@gmail.com>
+
+import warnings
+import numpy as np
+
+from abc import ABCMeta, abstractmethod
+from functools import partial
+from collections import defaultdict
+
+from ..base import clone, BaseEstimator
+from ..mixture.base import BaseMixture
+from ..externals import six
+from ..externals.joblib import Parallel, delayed
+from ..model_selection import ParameterGrid
+from ..utils import check_array, check_random_state
+from ..utils.fixes import rankdata
+from ..utils.metaestimators import if_delegate_has_method
+from ..utils.testing import ignore_warnings
+from ..utils.validation import check_is_fitted
+
+
+class _NClusterSearchBase(six.with_metaclass(ABCMeta, object)):
+    def __init__(self, estimator, parameters, n_jobs=1, refit=True,
+                 pre_dispatch='2*n_jobs', verbose=0):
+        self.estimator = estimator
+        self.parameters = parameters
+        self.n_jobs = n_jobs
+        self.refit = refit
+        self.pre_dispatch = pre_dispatch
+        self.verbose = verbose
+
+    def _initialized(self, X, y):
+        pass
+
+    def _compute_values(self, n_clusters_list):
+        return np.array(n_clusters_list)
+
+    def _parameter_grid(self, X, y):
+        # Determinate the name of the property controling then number of clusters
+        if isinstance(self.estimator, BaseMixture):
+            # General case
+            property_name = 'n_components'
+        elif isinstance(self.estimator, BaseEstimator):
+            # Mixture case
+            property_name = 'n_clusters'
+        else:
+            raise ValueError("The estimator has to be from the type ...")
+
+        # Compute n_clusters parameters
+        parameters = self.parameters.copy()
+        n_clusters_list = parameters.pop(property_name)
+        self.n_clusters_values, self._index = np.unique(
+            self._compute_values(n_clusters_list),
+            return_index=True)
+        self._index = self._index < len(n_clusters_list)
+
+        for n_clusters in self.n_clusters_values:
+            for params in ParameterGrid(parameters):
+                params[property_name] = n_clusters
+                yield params
+
+    @abstractmethod
+    def _estimator_fit(self, estimator, X, y, parameters):
+        pass
+
+    @abstractmethod
+    def _compute_results(self, X, out):
+        pass
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict(self, X):
+        """Call predict on the estimator with the best found parameters.
+
+        Only available if ``refit=True`` and the underlying estimator supports
+        ``predict``.
+
+        Parameters
+        -----------
+        X : indexable, length n_samples
+            Must fulfill the input assumptions of the
+            underlying estimator.
+
+        """
+        check_is_fitted(self, 'best_estimator_')
+        return self.best_estimator_.predict(X)
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict_proba(self, X):
+        """Call predict_proba on the estimator with the best found parameters.
+
+        Only available if ``refit=True`` and the underlying estimator supports
+        ``predict_proba``.
+
+        Parameters
+        -----------
+        X : indexable, length n_samples
+            Must fulfill the input assumptions of the
+            underlying estimator.
+
+        """
+        check_is_fitted(self, 'best_estimator_')
+        return self.best_estimator_.predict_proba(X)
+
+    def fit(self, X, y):
+        self._initialized(X, y)
+
+        base_estimator = clone(self.estimator)
+
+        out = Parallel(
+            n_jobs=self.n_jobs, verbose=self.verbose,
+            pre_dispatch=self.pre_dispatch
+        )(delayed(self._estimator_fit)(clone(base_estimator), X, y, parameters)
+          for parameters in self._parameter_grid(X, y))
+
+        self.results_ = self._compute_results(X, out)
+        ranks = np.asarray(rankdata(-self.results_['score'], method='min'),
+                           dtype=np.int32)
+        self.best_index_ = np.flatnonzero(ranks == 1)[0]
+        best_parameters = self.results_['params'][self.best_index_]
+
+        self.results_['rank_score'] = ranks
+
+        # Use one np.MaskedArray and mask all the places where the param is not
+        # applicable for that candidate. Use defaultdict as each candidate may
+        # not contain all the params
+        param_results = defaultdict(partial(np.ma.masked_all,
+                                            (len(ranks),), dtype=object))
+        for cand_i, params in enumerate(self.results_['params']):
+            for name, value in params.items():
+                # An all masked empty array gets created for the key
+                # `"param_%s" % name` at the first occurence of `name`.
+                # Setting the value at an index also unmasks that index
+                param_results["param_%s" % name][cand_i] = value
+
+        self.results_.update(param_results)
+
+        if self.refit:
+            # fit the best estimator using the entire dataset
+            # clone first to work around broken estimators
+            best_estimator = clone(base_estimator).set_params(
+                **best_parameters).fit(X, y)
+            self.best_estimator_ = best_estimator
+
+        return self
+
+    @property
+    def best_params_(self):
+        check_is_fitted(self, 'results_')
+        return self.results_['params'][self.best_index_]
+
+    @property
+    def best_score_(self):
+        check_is_fitted(self, 'results_')
+        return self.results_['score'][self.best_index_]
+
+
+# Search for unsupervised metrics
+class UnsupervisedMetricSearch(_NClusterSearchBase):
+    def __init__(self, estimator, parameters, metric):
+        super(UnsupervisedMetricSearch, self).__init__(
+            estimator, parameters)
+        self.metric = metric
+
+    def _estimator_fit(self, estimator, X, y, parameters):
+        estimator.set_params(**parameters)
+        try:
+            score = self.metric(X, estimator.fit_predict(X))
+        except ValueError:
+            # TODO Add a warning for false values
+            warnings.warn('Put a warning.')
+            score = np.nan
+
+        return score, parameters
+
+    def _compute_results(self, X, out):
+        scores, parameters = zip(*out)
+        return {
+            'score': np.array(scores),
+            'params': parameters,
+        }
+
+
+# Scorer for supervised metrics
+class StabilitySearch(_NClusterSearchBase):
+    def __init__(self, estimator, parameters, metric, n_draws=10,
+                 p_samples=.8, random_state=None):
+        super(StabilitySearch, self).__init__(
+            estimator, parameters)
+        self.metric = metric
+        self.n_draws = n_draws
+        self.p_samples = p_samples
+        self.random_state = random_state
+
+    def _initialized(self, X, y):
+        n_samples, _ = X.shape
+        rng = check_random_state(self.random_state)
+        self.data_ = (
+            rng.uniform(size=(self.n_draws, 2 * n_samples)) < self.p_samples)
+
+    def _estimator_fit(self, estimator, X, y, parameters):
+        estimator.set_params(**parameters)
+        draw_scores = np.empty(self.n_draws)
+        for l, d in enumerate(self.data_):
+            p1, p2 = np.split(d, 2)
+
+            labels1 = estimator.fit_predict(X[p1])
+            labels2 = estimator.fit_predict(X[p2])
+
+            draw_scores[l] = self.metric(labels1[p2[p1]], labels2[p1[p2]])
+        score = draw_scores.mean()
+        return score, parameters
+
+    def _compute_results(self, X, out):
+        scores, parameters = zip(*out)
+        return {
+            'score': np.array(scores),
+            'params': parameters,
+        }
+
+
+# Scorer for distortion jump
+class DistortionJumpSearch(_NClusterSearchBase):
+    def __init__(self, estimator, parameters):
+        super(DistortionJumpSearch, self).__init__(
+            estimator, parameters)
+
+    def _compute_values(self, n_clusters_list):
+        return np.hstack((n_clusters_list, np.array(n_clusters_list) + 1))
+
+    def _estimator_fit(self, estimator, X, y, parameters):
+        _, n_features = X.shape
+        estimator.set_params(**parameters)
+        distortion = (np.array(estimator.fit(X).score(X)) /
+                      n_features) ** (-n_features / 2)
+        return distortion, parameters
+
+    def _compute_results(self, X, out):
+        distortion, parameters = zip(*out)
+        distortion = np.array(distortion).reshape(
+            len(self.n_clusters_values), -1)
+        parameters = np.array(parameters).reshape(distortion.shape)
+
+        return {
+            'score': (distortion[self._index] -
+                      distortion[1:][self._index[:-1]]),
+            'params': parameters[self._index].ravel(),
+        }
+
+
+# Scorer for Pham
+class PhamSearch(_NClusterSearchBase):
+    def __init__(self, estimator, parameters):
+        super(PhamSearch, self).__init__(
+            estimator, parameters)
+
+    def _compute_values(self, n_clusters_list):
+        return np.hstack((n_clusters_list, np.array(n_clusters_list) - 1))
+
+    def _estimator_fit(self, estimator, X, y, parameters):
+        estimator.set_params(**parameters)
+        if parameters['n_clusters'] == 0:
+            return np.nan, parameters
+        return estimator.fit(X).score(X), parameters
+
+    def _compute_results(self, X, out):
+        _, n_features = X.shape
+
+        scores, parameters = zip(*out)
+        scores = np.array(scores).reshape(len(self.n_clusters_values), -1)
+        parameters = np.array(parameters).reshape(scores.shape)
+
+        weights = 1. - np.exp((self.n_clusters_values[self._index] - 2) *
+                              np.log(5. / 6.) + np.log(.75) -
+                              np.log(n_features))
+
+        with ignore_warnings(category=RuntimeWarning):
+            scores = np.where(
+                scores[self._index] != 0., scores[self._index] /
+                (weights[:, np.newaxis] * scores[:-1][self._index[1:]]), 1.)
+
+        if self.n_clusters_values[0] == 0:
+            scores[0, :] = 1.
+
+        return {
+            'score': -scores.ravel(),
+            'params': parameters[self._index].ravel(),
+        }
+
+
+# Scorer for Gap
+class GapSearch(_NClusterSearchBase):
+    def __init__(self, estimator, parameters, n_draws=10, n_jobs=1,
+                 random_state=None):
+        super(GapSearch, self).__init__(estimator, parameters, n_jobs)
+        self.n_draws = n_draws
+        self.random_state = random_state
+
+    def _initialized(self, X, y):
+        n_samples, n_features = X.shape
+        rng = check_random_state(self.random_state)
+
+        # Compute the random data once for all
+        bb_min, bb_max = np.min(X, 0), np.max(X, 0)
+        self._data = (rng.uniform(size=(self.n_draws, n_samples, n_features)) *
+                      (bb_max - bb_min) + bb_min)
+
+    def _compute_values(self, n_clusters_list):
+        return np.hstack((n_clusters_list, np.array(n_clusters_list) + 1))
+
+    def _estimator_fit(self, estimator, X, y, parameters):
+        estimator.set_params(**parameters)
+
+        estimated_log_inertia = np.log(-estimator.fit(X).score(X))
+        inertia_n_draws = np.empty(self.n_draws)
+        for t, Xt in enumerate(self._data):
+            inertia_n_draws[t] = -estimator.fit(Xt).score(Xt)
+        inertia_n_draws = np.log(inertia_n_draws)
+
+        expected_log_inertia = np.mean(inertia_n_draws)
+        gap = expected_log_inertia - estimated_log_inertia
+        safety = (np.std(inertia_n_draws) *
+                  np.sqrt(1. + 1. / self.n_draws))
+
+        return gap, safety, parameters
+
+    def _compute_results(self, X, out):
+        gap, safety, parameters = zip(*out)
+
+        gap = np.array(gap).reshape(len(self.n_clusters_values), -1)
+        safety = np.array(safety).reshape(gap.shape)
+
+        scores = (gap[self._index] - gap[1:][self._index[:-1]] +
+                  safety[1:][self._index[:-1]])
+
+        return {
+            'gap': gap[self._index].ravel(),
+            'safety': safety[self._index].ravel(),
+            'score': scores.ravel(),
+            'params': (
+                np.array(parameters).reshape(gap.shape)[self._index].ravel()),
+        }
+
+
+class OptimalNClusterSearch():
+    def __init__(self, estimator, parameters, fitting_process='auto',
+                 **kwargs):
+        self.estimator = estimator
+        self.parameters = parameters.copy()
+        self.fitting_process = fitting_process
+        self.kwargs = kwargs
+
+    def _check_initial_parameters(self, X):
+        n_samples, _ = X.shape
+
+        X = check_array(X, dtype=[np.float64, np.float32])
+        # if self.n_clusters_range.max() > n_samples:
+        #     raise ValueError("Put a message")
+
+    def _select_fitting_method(self):
+        fitting_method = {
+            'auto': PhamSearch,
+            'pham': PhamSearch,
+            'gap': GapSearch,
+            'distortion_jump': DistortionJumpSearch,
+            'stability': StabilitySearch,
+            'unsupervised': UnsupervisedMetricSearch
+        }
+
+        self.scorer_ = fitting_method[self.fitting_process](
+            estimator=self.estimator, parameters=self.parameters,
+            **self.kwargs)
+
+    def fit(self, X, y=None):
+        self._check_initial_parameters(X)
+
+        self._select_fitting_method()
+        self.scorer_.fit(X, y)
+
+        self.results_ = self.scorer_.results_
+        self.best_estimator_ = self.scorer_.best_estimator_
+
+        return self
diff --git a/sklearn/metrics/cluster/distortion.py b/sklearn/metrics/cluster/distortion.py
deleted file mode 100644
index 335f9f2d0a42f..0000000000000
--- a/sklearn/metrics/cluster/distortion.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from collections import defaultdict
-
-import numpy as np
-from scipy.spatial.distance import cdist
-
-
-def distortion(X, labels, distortion_meth='sqeuclidean', p=2):
-    """
-    Given data and their cluster assigment, compute the distortion D
-
-    Parameter
-    ---------
-    X: numpy array of shape (nb_data, nb_feature)
-    labels: list of int of length nb_data
-    distortion_meth: can be a function X, labels -> float,
-        can be a string naming a scipy.spatial distance. can be in
-        ['euclidian', 'minkowski', 'seuclidiean', 'sqeuclidean', 'chebyshev'
-         'cityblock', 'cosine', 'correlation', 'hamming', 'jaccard',
-         'Bray-Curtis', 'mahalanobis', 'yule', 'matching', 'dice', 'kulsinski',
-         'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
-         'canberra', 'wminkowski'])
-    p : double
-        The p-norm to apply (for Minkowski, weighted and unweighted)
-
-    Return
-    ------
-    distortion: float
-    """
-    if isinstance(distortion_meth, str):
-        return distortion_metrics(X, labels, distortion_meth, p)
-    else:
-        return distortion_meth(X, labels)
-
-
-def distortion_metrics(X, labels, metric='sqeuclidean', p=2):
-    """
-    Given data and their cluster assigment, compute the distortion D
-
-    D = \sum_{x \in X} distance(x, c_x)
-
-    With c_x the center of the cluster containing x, distance is the distance
-    defined by metrics
-
-    Parameter
-    ---------
-    X: numpy array of shape (nb_data, nb_feature)
-    labels: list of int of length nb_data
-    metric: string naming a scipy.spatial distance. metric can be in
-        ['euclidian', 'minkowski', 'seuclidiean', 'sqeuclidean', 'chebyshev'
-         'cityblock', 'cosine', 'correlation', 'hamming', 'jaccard',
-         'Bray-Curtis', 'mahalanobis', 'yule', 'matching', 'dice', 'kulsinski',
-         'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
-         'wminkowski', 'canberra']
-    p : double
-        The p-norm to apply (for Minkowski, weighted and unweighted)
-
-    Return
-    ------
-    distortion: float
-    """
-    if metric == 'l2':
-        # Translate to something understood by scipy
-        metric = 'euclidean'
-    elif metric in ('l1', 'manhattan'):
-        metric = 'cityblock'
-
-    assi = defaultdict(list)
-    for i, l in enumerate(labels):
-        assi[l].append(i)
-
-    inertia = .0
-    nb_feature = X.shape[1]
-    for points in assi.values():
-        clu_points = X[points, :]
-        clu_center = np.mean(clu_points, axis=0).reshape(1, nb_feature)
-        inertia += np.sum(cdist(clu_points, clu_center, metric=metric, p=p))
-
-    return inertia / X.shape[1]
diff --git a/sklearn/metrics/cluster/tests/test_distortion.py b/sklearn/metrics/cluster/tests/test_distortion.py
deleted file mode 100644
index c38db57f69220..0000000000000
--- a/sklearn/metrics/cluster/tests/test_distortion.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import numpy as np
-
-from sklearn.utils.testing import assert_almost_equal
-
-from sklearn.metrics.cluster.distortion import distortion
-
-
-def test_distortion():
-    X = np.array([[0, 0], [2, 2],
-                  [5, 5], [6, 6]])
-    labels = [0, 0, 1, 1]
-    assert_almost_equal(distortion(X, labels), 2.5)

From 227b30d930a9ecea0438a399c30c8d7862851b55 Mon Sep 17 00:00:00 2001
From: Thierry Guillemot <thierry.guillemot.work@gmail.com>
Date: Wed, 29 Jun 2016 18:06:21 +0200
Subject: [PATCH 3/5] Add plot_choose_n_optimal example [ci skip]

---
 examples/cluster/plot_optimal_n_clusters.py | 73 +++++++++++++++++++++
 sklearn/cluster/optimal_nclusters_search.py | 11 +++-
 2 files changed, 82 insertions(+), 2 deletions(-)
 create mode 100644 examples/cluster/plot_optimal_n_clusters.py

diff --git a/examples/cluster/plot_optimal_n_clusters.py b/examples/cluster/plot_optimal_n_clusters.py
new file mode 100644
index 0000000000000..30805d4ecb14e
--- /dev/null
+++ b/examples/cluster/plot_optimal_n_clusters.py
@@ -0,0 +1,73 @@
+"""Plot the results of the gap criterium."""
+
+# Authors: Thierry Guillemot <thierry.guillemot.work@gmail.com>
+
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.cluster import KMeans, OptimalNClusterSearch
+from sklearn.datasets import make_blobs
+from sklearn.metrics import calinski_harabaz_score, fowlkes_mallows_score
+from sklearn.metrics import silhouette_score
+from sklearn.utils import check_random_state
+
+
+n_samples, n_features, random_state = 1000, 2, 1
+parameters = {'n_clusters': np.arange(1, 7)}
+
+rng = check_random_state(random_state)
+datasets = [
+    ('3 clusters', make_blobs(n_samples=n_samples, n_features=2,
+                              random_state=random_state, centers=3)),
+    ('5 clusters', make_blobs(n_samples=n_samples, n_features=2,
+                              random_state=random_state, centers=5)),
+    ('random', (rng.rand(n_samples, n_features),
+                np.zeros(n_samples, dtype=int))),
+]
+
+estimator = KMeans(n_init=10, random_state=0)
+searchers = [
+    ('Silhouette', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters,
+        fitting_process='unsupervised', metric=silhouette_score)),
+    ('Calinski', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters,
+        fitting_process='unsupervised', metric=calinski_harabaz_score)),
+    ('Stability', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters, random_state=0,
+        fitting_process='stability', metric=fowlkes_mallows_score)),
+    ('Distortion jump', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters,
+        fitting_process='distortion_jump')),
+    ('Gap', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters, random_state=0,
+        fitting_process='gap')),
+    ('Pham', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters, fitting_process='pham')),
+]
+
+color = 'bgrcmyk'
+plt.figure(figsize=(13, 9.5))
+plt.subplots_adjust(left=.001, right=.999, bottom=.001, top=.96, wspace=.05,
+                    hspace=.01)
+for k, (data_name, data) in enumerate(datasets):
+    X, _ = data
+    for l, (search_name, search) in enumerate(searchers):
+        t0 = time.time()
+        y = search.fit(X).predict(X)
+        t1 = time.time()
+
+        colors = np.array([color[k] for k in y])
+        plt.subplot(len(datasets), len(searchers),
+                    len(searchers) * k + l + 1)
+        if k == 0:
+            plt.title(search_name, size=18)
+        plt.scatter(X[:, 0], X[:, 1], color=colors, alpha=.6)
+        plt.xticks(())
+        plt.yticks(())
+        plt.axis('equal')
+        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
+                 transform=plt.gca().transAxes, size=15,
+                 horizontalalignment='right')
+plt.show()
diff --git a/sklearn/cluster/optimal_nclusters_search.py b/sklearn/cluster/optimal_nclusters_search.py
index 91a3745b8df16..8daf5f476bbbb 100644
--- a/sklearn/cluster/optimal_nclusters_search.py
+++ b/sklearn/cluster/optimal_nclusters_search.py
@@ -202,6 +202,10 @@ def _initialized(self, X, y):
 
     def _estimator_fit(self, estimator, X, y, parameters):
         estimator.set_params(**parameters)
+        if parameters['n_clusters'] == 1:
+            warnings.warn('Put a warning.')
+            return np.nan, parameters
+
         draw_scores = np.empty(self.n_draws)
         for l, d in enumerate(self.data_):
             p1, p2 = np.split(d, 2)
@@ -284,6 +288,9 @@ def _compute_results(self, X, out):
         if self.n_clusters_values[0] == 0:
             scores[0, :] = 1.
 
+        # XXX change that to not modify the score
+        scores[scores > 0.85] = 1.
+
         return {
             'score': -scores.ravel(),
             'params': parameters[self._index].ravel(),
@@ -333,7 +340,7 @@ def _compute_results(self, X, out):
         safety = np.array(safety).reshape(gap.shape)
 
         scores = (gap[self._index] - gap[1:][self._index[:-1]] +
-                  safety[1:][self._index[:-1]])
+                  safety[1:][self._index[:-1]]) >= 0
 
         return {
             'gap': gap[self._index].ravel(),
@@ -382,4 +389,4 @@ def fit(self, X, y=None):
         self.results_ = self.scorer_.results_
         self.best_estimator_ = self.scorer_.best_estimator_
 
-        return self
+        return self.scorer_

From a0926db6462f15c9bef11ce1fd44093f09802738 Mon Sep 17 00:00:00 2001
From: Thierry Guillemot <thierry.guillemot.work@gmail.com>
Date: Mon, 9 Jan 2017 10:36:48 +0100
Subject: [PATCH 4/5] Modify little stuff for testing.

---
 examples/cluster/plot_optimal_n_clusters.py | 26 +++++++++++----------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/examples/cluster/plot_optimal_n_clusters.py b/examples/cluster/plot_optimal_n_clusters.py
index 30805d4ecb14e..eb9f494d5ac25 100644
--- a/examples/cluster/plot_optimal_n_clusters.py
+++ b/examples/cluster/plot_optimal_n_clusters.py
@@ -6,8 +6,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from sklearn.cluster import KMeans, OptimalNClusterSearch
-from sklearn.datasets import make_blobs
+from sklearn.cluster import KMeans, SpectralClustering, OptimalNClusterSearch
+from sklearn.datasets import make_blobs, make_circles, make_moons
 from sklearn.metrics import calinski_harabaz_score, fowlkes_mallows_score
 from sklearn.metrics import silhouette_score
 from sklearn.utils import check_random_state
@@ -24,9 +24,11 @@
                               random_state=random_state, centers=5)),
     ('random', (rng.rand(n_samples, n_features),
                 np.zeros(n_samples, dtype=int))),
+    ('circle', make_circles(n_samples=n_samples, factor=.5, noise=.05, shuffle=True, random_state=0)),
+    ('moon', make_moons(n_samples=n_samples, noise=.05, shuffle=True, random_state=0)),
 ]
 
-estimator = KMeans(n_init=10, random_state=0)
+estimator = SpectralClustering(n_init=10, random_state=0)
 searchers = [
     ('Silhouette', OptimalNClusterSearch(
         estimator=estimator, parameters=parameters,
@@ -37,14 +39,14 @@
     ('Stability', OptimalNClusterSearch(
         estimator=estimator, parameters=parameters, random_state=0,
         fitting_process='stability', metric=fowlkes_mallows_score)),
-    ('Distortion jump', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters,
-        fitting_process='distortion_jump')),
-    ('Gap', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters, random_state=0,
-        fitting_process='gap')),
-    ('Pham', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters, fitting_process='pham')),
+    # ('Distortion jump', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters,
+    #     fitting_process='distortion_jump')),
+    # ('Gap', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters, random_state=0,
+    #     fitting_process='gap')),
+    # ('Pham', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters, fitting_process='pham')),
 ]
 
 color = 'bgrcmyk'
@@ -55,7 +57,7 @@
     X, _ = data
     for l, (search_name, search) in enumerate(searchers):
         t0 = time.time()
-        y = search.fit(X).predict(X)
+        y = search.fit(X).best_estimator_.labels_
         t1 = time.time()
 
         colors = np.array([color[k] for k in y])

From 1868d751067a3fe319c8cce91971caf552c5d382 Mon Sep 17 00:00:00 2001
From: Thierry Guillemot <thierry.guillemot.work@gmail.com>
Date: Mon, 9 Jan 2017 18:42:53 +0100
Subject: [PATCH 5/5] [ci-skip] Add doc, unsupervised metric for any
 clustersearch.

---
 examples/cluster/plot_optimal_n_clusters.py |  38 +-
 sklearn/cluster/optimal_nclusters_search.py | 618 ++++++++++++++++++--
 2 files changed, 584 insertions(+), 72 deletions(-)

diff --git a/examples/cluster/plot_optimal_n_clusters.py b/examples/cluster/plot_optimal_n_clusters.py
index eb9f494d5ac25..fc033cfc8a4ae 100644
--- a/examples/cluster/plot_optimal_n_clusters.py
+++ b/examples/cluster/plot_optimal_n_clusters.py
@@ -14,7 +14,7 @@
 
 
 n_samples, n_features, random_state = 1000, 2, 1
-parameters = {'n_clusters': np.arange(1, 7)}
+parameters = {'n_clusters': np.arange(1, 8)}
 
 rng = check_random_state(random_state)
 datasets = [
@@ -24,29 +24,35 @@
                               random_state=random_state, centers=5)),
     ('random', (rng.rand(n_samples, n_features),
                 np.zeros(n_samples, dtype=int))),
-    ('circle', make_circles(n_samples=n_samples, factor=.5, noise=.05, shuffle=True, random_state=0)),
-    ('moon', make_moons(n_samples=n_samples, noise=.05, shuffle=True, random_state=0)),
+    ('circle', make_circles(n_samples=n_samples, factor=.5, noise=.05,
+                            shuffle=True, random_state=random_state)),
+    ('moon', make_moons(n_samples=n_samples, noise=.05,
+                        shuffle=True, random_state=random_state)),
 ]
 
-estimator = SpectralClustering(n_init=10, random_state=0)
+estimator = SpectralClustering(eigen_solver='arpack',
+                               affinity="nearest_neighbors",
+                               n_init=10, random_state=0)
+# estimator = KMeans(n_init=10, random_state=0)
 searchers = [
-    ('Silhouette', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters,
-        fitting_process='unsupervised', metric=silhouette_score)),
-    ('Calinski', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters,
-        fitting_process='unsupervised', metric=calinski_harabaz_score)),
-    ('Stability', OptimalNClusterSearch(
-        estimator=estimator, parameters=parameters, random_state=0,
-        fitting_process='stability', metric=fowlkes_mallows_score)),
+    # ('Silhouette', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters,
+    #     fitting_process='unsupervised', metric=silhouette_score)),
+    # ('Calinski', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters,
+    #     fitting_process='unsupervised', metric=calinski_harabaz_score)),
+    # ('Stability', OptimalNClusterSearch(
+    #     estimator=estimator, parameters=parameters, random_state=0,
+    #     fitting_process='stability', metric=fowlkes_mallows_score)),
     # ('Distortion jump', OptimalNClusterSearch(
     #     estimator=estimator, parameters=parameters,
     #     fitting_process='distortion_jump')),
     # ('Gap', OptimalNClusterSearch(
     #     estimator=estimator, parameters=parameters, random_state=0,
-    #     fitting_process='gap')),
-    # ('Pham', OptimalNClusterSearch(
-    #     estimator=estimator, parameters=parameters, fitting_process='pham')),
+    #     fitting_process='gap', metric=silhouette_score)),
+    ('Pham', OptimalNClusterSearch(
+        estimator=estimator, parameters=parameters, fitting_process='pham',
+        metric=silhouette_score)),
 ]
 
 color = 'bgrcmyk'
diff --git a/sklearn/cluster/optimal_nclusters_search.py b/sklearn/cluster/optimal_nclusters_search.py
index 8daf5f476bbbb..1f3ccdc5f17a1 100644
--- a/sklearn/cluster/optimal_nclusters_search.py
+++ b/sklearn/cluster/optimal_nclusters_search.py
@@ -23,12 +23,16 @@
 
 
 class _NClusterSearchBase(six.with_metaclass(ABCMeta, object)):
-    def __init__(self, estimator, parameters, n_jobs=1, refit=True,
+    """Base class for cluster searchers.
+
+    This abstract class specifies an interface for all cluster searchers and
+    provides some basic common methods.
+    """
+    def __init__(self, estimator, parameters, n_jobs=1,
                  pre_dispatch='2*n_jobs', verbose=0):
         self.estimator = estimator
         self.parameters = parameters
         self.n_jobs = n_jobs
-        self.refit = refit
         self.pre_dispatch = pre_dispatch
         self.verbose = verbose
 
@@ -36,38 +40,64 @@ def _initialized(self, X, y):
         pass
 
     def _compute_values(self, n_clusters_list):
+        """Define all n_clusters values needed to compute a score."""
         return np.array(n_clusters_list)
 
     def _parameter_grid(self, X, y):
-        # Determinate the name of the property controling then number of clusters
+        """Define the grid used by the cluster searcher."""
+        # Define the property name controling the number of clusters
         if isinstance(self.estimator, BaseMixture):
-            # General case
+            # Case for clustering estimator
             property_name = 'n_components'
         elif isinstance(self.estimator, BaseEstimator):
-            # Mixture case
+            # Case for mixture estimator
             property_name = 'n_clusters'
         else:
             raise ValueError("The estimator has to be from the type ...")
 
-        # Compute n_clusters parameters
+        # Extract number of cluster parameter from the parameter list
         parameters = self.parameters.copy()
         n_clusters_list = parameters.pop(property_name)
+        # Avoid multiple computation of sets of same parameters
         self.n_clusters_values, self._index = np.unique(
             self._compute_values(n_clusters_list),
             return_index=True)
         self._index = self._index < len(n_clusters_list)
 
+        # Grid definition
         for n_clusters in self.n_clusters_values:
             for params in ParameterGrid(parameters):
                 params[property_name] = n_clusters
                 yield params
 
+    def _compute_score(self, estimator, X):
+        """Compute the score of the estimator according to self.metric."""
+        # XXX  Not good to deal with bad values to modify
+        try:
+            if self.metric is not None:
+                # Add a warning for n_cluster = 1
+                score = self.metric(X, estimator.fit_predict(X))
+            elif hasattr(estimator, 'score'):
+                score = estimator.fit(X).score(X)
+            # else:
+            #     raise ValueError(
+            #         "The class %s does not have a score method. Change the "
+            #         "estimator type or give an unsupervised metric to the "
+            #         "``metric`` attribute.")
+        except ValueError:
+            # TODO Add a warning for false values
+            # warnings.warn('Put a warning.')
+            score = np.nan
+        return score
+
     @abstractmethod
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         pass
 
     @abstractmethod
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         pass
 
     @if_delegate_has_method(delegate='estimator')
@@ -105,10 +135,25 @@ def predict_proba(self, X):
         return self.best_estimator_.predict_proba(X)
 
     def fit(self, X, y):
+        """Run fit with all sets of parameters.
+
+        Parameters
+        ----------
+
+        X : array-like, shape = [n_samples, n_features]
+            Training vector, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+        """
         self._initialized(X, y)
 
         base_estimator = clone(self.estimator)
 
+        # Fit the estimator for each parameter and compute output used
+        # by _compute_results to define a score
         out = Parallel(
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=self.pre_dispatch
@@ -116,11 +161,11 @@ def fit(self, X, y):
           for parameters in self._parameter_grid(X, y))
 
         self.results_ = self._compute_results(X, out)
+
+        # XXX Change that ??
         ranks = np.asarray(rankdata(-self.results_['score'], method='min'),
                            dtype=np.int32)
         self.best_index_ = np.flatnonzero(ranks == 1)[0]
-        best_parameters = self.results_['params'][self.best_index_]
-
         self.results_['rank_score'] = ranks
 
         # Use one np.MaskedArray and mask all the places where the param is not
@@ -136,46 +181,102 @@ def fit(self, X, y):
                 param_results["param_%s" % name][cand_i] = value
 
         self.results_.update(param_results)
-
-        if self.refit:
-            # fit the best estimator using the entire dataset
-            # clone first to work around broken estimators
-            best_estimator = clone(base_estimator).set_params(
-                **best_parameters).fit(X, y)
-            self.best_estimator_ = best_estimator
+        self.best_params_ = self.results_['params'][self.best_index_]
+        self.best_score_ = self.results_['score'][self.best_index_]
+
+        # fit the best estimator
+        best_estimator = clone(base_estimator).set_params(
+            **self.best_params_)
+        if y is not None:
+            best_estimator.fit(X, y)
+        else:
+            best_estimator.fit(X)
+        self.best_estimator_ = best_estimator
 
         return self
 
-    @property
-    def best_params_(self):
-        check_is_fitted(self, 'results_')
-        return self.results_['params'][self.best_index_]
-
-    @property
-    def best_score_(self):
-        check_is_fitted(self, 'results_')
-        return self.results_['score'][self.best_index_]
-
 
 # Search for unsupervised metrics
 class UnsupervisedMetricSearch(_NClusterSearchBase):
-    def __init__(self, estimator, parameters, metric):
+    """Unsupervised metric search on hyper parameters.
+
+    UnsupervisedMetricSearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using an unsupervised metric
+    to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable or None, defaults to None
+        An unsupervised metric function with signature
+        ``metric(X, y, **kwargs)``.
+        If ``None``, the ``score`` method of the estimator is used.
+
+    n_jobs : int, default=1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
+    def __init__(self, estimator, parameters, metric=None, n_jobs=-1,
+                 pre_dispatch='2*n_jobs', verbose=0):
         super(UnsupervisedMetricSearch, self).__init__(
-            estimator, parameters)
+            estimator, parameters, n_jobs, pre_dispatch, verbose)
         self.metric = metric
 
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         estimator.set_params(**parameters)
-        try:
-            score = self.metric(X, estimator.fit_predict(X))
-        except ValueError:
-            # TODO Add a warning for false values
-            warnings.warn('Put a warning.')
-            score = np.nan
-
+        score = self._compute_score(estimator, X)
         return score, parameters
 
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         scores, parameters = zip(*out)
         return {
             'score': np.array(scores),
@@ -185,25 +286,100 @@ def _compute_results(self, X, out):
 
 # Scorer for supervised metrics
 class StabilitySearch(_NClusterSearchBase):
-    def __init__(self, estimator, parameters, metric, n_draws=10,
-                 p_samples=.8, random_state=None):
+    """Unsupervised metric search on hyper parameters.
+
+    StabilitySearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using a stability search
+    process to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable
+        An unsupervised metric function with signature
+        ``metric(X, y, **kwargs)``.
+        If ``None``, the ``score`` method of the estimator is used.
+
+    n_draws : int, defaults to 10
+        Number of draws used to define the score.
+
+    p_samples: float, defaults to .8
+        Probability to draw point to define the sample sets.
+
+    n_jobs : int, defaults to 1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
+    def __init__(self, estimator, parameters, metric=None, n_draws=10,
+                 p_samples=.8, random_state=None, n_jobs=-1,
+                 pre_dispatch='2*n_jobs', verbose=0):
         super(StabilitySearch, self).__init__(
-            estimator, parameters)
+            estimator, parameters, n_jobs, pre_dispatch, verbose)
         self.metric = metric
         self.n_draws = n_draws
         self.p_samples = p_samples
         self.random_state = random_state
 
     def _initialized(self, X, y):
+        """Define the subsampled sets used by _estimator_fit."""
         n_samples, _ = X.shape
         rng = check_random_state(self.random_state)
         self.data_ = (
             rng.uniform(size=(self.n_draws, 2 * n_samples)) < self.p_samples)
 
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         estimator.set_params(**parameters)
+        # Be sure a warining is the best thing to do ?
         if parameters['n_clusters'] == 1:
-            warnings.warn('Put a warning.')
+            warnings.warn('StabilitySearch can not be used for n_clusers = 1.')
             return np.nan, parameters
 
         draw_scores = np.empty(self.n_draws)
@@ -218,6 +394,7 @@ def _estimator_fit(self, estimator, X, y, parameters):
         return score, parameters
 
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         scores, parameters = zip(*out)
         return {
             'score': np.array(scores),
@@ -227,21 +404,96 @@ def _compute_results(self, X, out):
 
 # Scorer for distortion jump
 class DistortionJumpSearch(_NClusterSearchBase):
-    def __init__(self, estimator, parameters):
+    """Search on hyper parameters using the distortion jump scheme.
+
+    DistortionJumpSearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using a distortion jump
+    process to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable
+        A supervised metric function with signature
+        ``metric(y_true, y_pred, **kwargs)``.
+
+    n_draws : int, defaults to 10
+        Number of draws used to define the score.
+
+    p_samples: float, defaults to .8
+        Probability to draw point to define the sample sets.
+
+    n_jobs : int, defaults to 1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
+    def __init__(self, estimator, parameters, metric=None, n_jobs=-1,
+                 pre_dispatch='2*n_jobs', verbose=0):
         super(DistortionJumpSearch, self).__init__(
-            estimator, parameters)
+            estimator, parameters, n_jobs, pre_dispatch, verbose)
+        self.metric = metric
 
     def _compute_values(self, n_clusters_list):
+        """Define all n_clusters values needed to compute a score."""
         return np.hstack((n_clusters_list, np.array(n_clusters_list) + 1))
 
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         _, n_features = X.shape
         estimator.set_params(**parameters)
-        distortion = (np.array(estimator.fit(X).score(X)) /
-                      n_features) ** (-n_features / 2)
+        score = self._compute_score(estimator, X)
+        distortion = (score / n_features) ** (-n_features / 2)
         return distortion, parameters
 
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         distortion, parameters = zip(*out)
         distortion = np.array(distortion).reshape(
             len(self.n_clusters_values), -1)
@@ -256,20 +508,96 @@ def _compute_results(self, X, out):
 
 # Scorer for Pham
 class PhamSearch(_NClusterSearchBase):
-    def __init__(self, estimator, parameters):
+    """Unsupervised metric search on hyper parameters.
+
+    StabilitySearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using a stability search
+    process to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable
+        A supervised metric function with signature
+        ``metric(y_true, y_pred, **kwargs)``.
+
+    n_draws : int, defaults to 10
+        Number of draws used to define the score.
+
+    p_samples: float, defaults to .8
+        Probability to draw point to define the sample sets.
+
+    n_jobs : int, defaults to 1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
+    def __init__(self, estimator, parameters, metric=None):
         super(PhamSearch, self).__init__(
             estimator, parameters)
+        self.metric = metric
 
     def _compute_values(self, n_clusters_list):
+        """Define all n_clusters values needed to compute a score."""
         return np.hstack((n_clusters_list, np.array(n_clusters_list) - 1))
 
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         estimator.set_params(**parameters)
-        if parameters['n_clusters'] == 0:
+        if parameters['n_clusters'] < 1:
             return np.nan, parameters
-        return estimator.fit(X).score(X), parameters
+        # XXX I clearly don't like to use any unsupervised metric here.
+        score = self._compute_score(estimator, X)
+        return score, parameters
 
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         _, n_features = X.shape
 
         scores, parameters = zip(*out)
@@ -281,15 +609,19 @@ def _compute_results(self, X, out):
                               np.log(n_features))
 
         with ignore_warnings(category=RuntimeWarning):
-            scores = np.where(
-                scores[self._index] != 0., scores[self._index] /
+            scores = np.where(np.logical_and(
+                np.vstack((
+                    True,
+                    ~np.isnan(scores[self._index])[:-1])),
+                scores[self._index] > 0), scores[self._index] /
                 (weights[:, np.newaxis] * scores[:-1][self._index[1:]]), 1.)
 
         if self.n_clusters_values[0] == 0:
             scores[0, :] = 1.
 
         # XXX change that to not modify the score
-        scores[scores > 0.85] = 1.
+        with ignore_warnings(category=RuntimeWarning):
+            scores[scores > 0.85] = 1.
 
         return {
             'score': -scores.ravel(),
@@ -299,13 +631,85 @@ def _compute_results(self, X, out):
 
 # Scorer for Gap
 class GapSearch(_NClusterSearchBase):
-    def __init__(self, estimator, parameters, n_draws=10, n_jobs=1,
-                 random_state=None):
+    """Unsupervised metric search on hyper parameters.
+
+    StabilitySearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using a stability search
+    process to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable
+        A supervised metric function with signature
+        ``metric(y_true, y_pred, **kwargs)``.
+
+    n_draws : int, defaults to 10
+        Number of draws used to define the score.
+
+    p_samples: float, defaults to .8
+        Probability to draw point to define the sample sets.
+
+    n_jobs : int, defaults to 1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
+    def __init__(self, estimator, parameters, metric=None, n_draws=10,
+                 n_jobs=1, random_state=None):
         super(GapSearch, self).__init__(estimator, parameters, n_jobs)
+        self.metric = metric
         self.n_draws = n_draws
         self.random_state = random_state
 
     def _initialized(self, X, y):
+        """Define the subsampled sets used by _estimator_fit."""
         n_samples, n_features = X.shape
         rng = check_random_state(self.random_state)
 
@@ -315,15 +719,33 @@ def _initialized(self, X, y):
                       (bb_max - bb_min) + bb_min)
 
     def _compute_values(self, n_clusters_list):
+        """Define all n_clusters values needed to compute a score."""
         return np.hstack((n_clusters_list, np.array(n_clusters_list) + 1))
 
+    # def _compute_score(self, estimator, X):
+    #     """Compute result from the values computed by _estimator_fit."""
+    #     if self.metric is not None:
+    #         # Add a warning for n_cluster = 1
+    #         if estimator.n_clusters <= 1:
+    #             return np.nan
+    #         score = self.metric(X, estimator.fit_predict(X))
+    #     elif hasattr(estimator, 'score'):
+    #         score = estimator.fit(X).score(X)
+    #     else:
+    #         raise ValueError("The estimator does not have score_ attribute.")
+    #
+    #     return score
+
     def _estimator_fit(self, estimator, X, y, parameters):
+        """Fit the estimator and return values used to define a score."""
         estimator.set_params(**parameters)
 
-        estimated_log_inertia = np.log(-estimator.fit(X).score(X))
         inertia_n_draws = np.empty(self.n_draws)
-        for t, Xt in enumerate(self._data):
-            inertia_n_draws[t] = -estimator.fit(Xt).score(Xt)
+        with ignore_warnings(category=RuntimeWarning):
+            estimated_log_inertia = np.log(
+                np.abs(self._compute_score(estimator, X)))
+            for t, Xt in enumerate(self._data):
+                inertia_n_draws[t] = np.abs(self._compute_score(estimator, Xt))
         inertia_n_draws = np.log(inertia_n_draws)
 
         expected_log_inertia = np.mean(inertia_n_draws)
@@ -334,14 +756,15 @@ def _estimator_fit(self, estimator, X, y, parameters):
         return gap, safety, parameters
 
     def _compute_results(self, X, out):
+        """Compute result from the values computed by _estimator_fit."""
         gap, safety, parameters = zip(*out)
 
         gap = np.array(gap).reshape(len(self.n_clusters_values), -1)
         safety = np.array(safety).reshape(gap.shape)
 
-        scores = (gap[self._index] - gap[1:][self._index[:-1]] +
-                  safety[1:][self._index[:-1]]) >= 0
-
+        with ignore_warnings(category=RuntimeWarning):
+            scores = ((gap[self._index] - gap[1:][self._index[:-1]] +
+                       safety[1:][self._index[:-1]]) >= 0).astype(gap.dtype)
         return {
             'gap': gap[self._index].ravel(),
             'safety': safety[self._index].ravel(),
@@ -352,6 +775,76 @@ def _compute_results(self, X, out):
 
 
 class OptimalNClusterSearch():
+    """Unsupervised metric search on hyper parameters.
+
+    StabilitySearch implements an exhaustive search over specified
+    parameter values for a clustering estimator using a stability search
+    process to define the best result.
+
+    Parameters
+    ----------
+    estimator : estimator object.
+        This is assumed to implement the scikit-learn estimator interface.
+        Either estimator needs to provide a ``score`` function,
+        or ``metric`` must be passed.
+
+    parameters : dict or list of dictionaries
+        Dictionary with parameters names (string) as keys and lists of
+        parameter settings to try as values, or a list of such
+        dictionaries, in which case the grids spanned by each dictionary
+        in the list are explored. This enables searching over any sequence
+        of parameter settings.
+
+    metric : callable
+        A supervised metric function with signature
+        ``metric(y_true, y_pred, **kwargs)``.
+
+    n_draws : int, defaults to 10
+        Number of draws used to define the score.
+
+    p_samples: float, defaults to .8
+        Probability to draw point to define the sample sets.
+
+    n_jobs : int, defaults to 1
+        Number of jobs to run in parallel.
+
+    pre_dispatch : int, or string, optional
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+
+            - An int, giving the exact number of total jobs that are
+              spawned
+
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    verbose : integer
+        Controls the verbosity: the higher, the more messages.
+
+    Attributes
+    ----------
+    results_ : list of named tuples
+        Contains scores for all parameter combinations in param_grid.
+        Each entry corresponds to one parameter setting.
+
+    best_estimator_ : estimator
+        Estimator that was chosen by the search, i.e. estimator
+        which gave highest score (or smallest loss if specified)
+        on the left out data. Not available if refit=False.
+
+    best_score_ : float
+        Score of best_estimator on the left out data.
+
+    best_params_ : dict
+        Parameter setting that gave the best results on the hold out data.
+    """
     def __init__(self, estimator, parameters, fitting_process='auto',
                  **kwargs):
         self.estimator = estimator
@@ -381,6 +874,19 @@ def _select_fitting_method(self):
             **self.kwargs)
 
     def fit(self, X, y=None):
+        """Run fit with all sets of parameters.
+
+        Parameters
+        ----------
+
+        X : array-like, shape = [n_samples, n_features]
+            Training vector, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+        """
         self._check_initial_parameters(X)
 
         self._select_fitting_method()