scikit-learn
diff --git a/‎doc/modules/clustering.rst
Lines changed: 16 additions & 18 deletions b/‎doc/modules/clustering.rst
Lines changed: 16 additions & 18 deletions
diff --git a/‎doc/whats_new/v0.20.rst
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new/v0.20.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/metrics/cluster/tests/test_unsupervised.py
Lines changed: 37 additions & 38 deletions b/‎sklearn/metrics/cluster/tests/test_unsupervised.py
Lines changed: 37 additions & 38 deletions
@@ -1621,14 +1621,15 @@ Then the DB index is defined as:
 .. math::
    DB = \frac{1}{k} \sum{i=1}^k \max_{i \neq j} R_{ij}
 
+A lower index value is indicative of a better clustering partition.
+
+In normal usage, the Davies-Bouldin index is applied to the results of a
+cluster analysis as follows:
 
   >>> from sklearn import datasets
   >>> iris = datasets.load_iris()
   >>> X = iris.data
 
-In normal usage, the Davies-Bouldin index is applied to the results of a
-cluster analysis.
-
   >>> from sklearn.cluster import KMeans
   >>> from sklearn.metrics import davies_bouldin_index
   >>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
@@ -1642,9 +1643,6 @@ Advantages
 
 - The computation of Davies-Bouldin is simpler than that of Silhouette scores.
 
-- Contrary to Calinsky-Harabaz, Davies-Bouldin is bounded to values between 0
-  and 1.
-
 - The index is computed only quantities and features inherent to the dataset.
 
 Drawbacks
@@ -1660,20 +1658,20 @@ Drawbacks
 
 .. topic:: References
 
-* Davies, David L.; Bouldin, Donald W. (1979).
-  "A Cluster Separation Measure"
-  IEEE Transactions on Pattern Analysis and Machine Intelligence.
-  PAMI-1 (2): 224-227.
-  `doi:10.1109/TPAMI.1979.4766909 <http://dx.doi.org/10.1109/TPAMI.1979.4766909>`_.
+ * Davies, David L.; Bouldin, Donald W. (1979).
+   "A Cluster Separation Measure"
+   IEEE Transactions on Pattern Analysis and Machine Intelligence.
+   PAMI-1 (2): 224-227.
+   `doi:10.1109/TPAMI.1979.4766909 <http://dx.doi.org/10.1109/TPAMI.1979.4766909>`_.
+
+ * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001).
+   "On Clustering Validation Techniques"
+   Journal of Intelligent Information Systems, 17(2-3), 107-145.
+   `doi:10.1023/A:1012801612483 <http://dx.doi.org/10.1023/A:1012801612483>`_.
 
-* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001).
-  "On Clustering Validation Techniques"
-  Journal of Intelligent Information Systems, 17(2-3), 107-145.
-  `doi:10.1023/A:1012801612483 <http://dx.doi.org/10.1023/A:1012801612483>`_.
+ * `Wikipedia entry for Davies-Bouldin index
+   <https://en.wikipedia.org/wiki/Davies–Bouldin_index>`_.
 
-* `Wikipedia entry for Davies-Bouldin index
-  <https://en.wikipedia.org/wiki/Davies–Bouldin_index>`_.
-  
 
 .. _contingency_matrix:
 
 
@@ -99,6 +99,9 @@ Preprocessing
 
 Model evaluation
 
+- Added the :func:`metrics.cluster.davies_bouldin_index` metric for unsupervised
+  evaluation of clustering models. :issue:`10827` by :user:`Luis Osa <logc>`.
+
 - Added the :func:`metrics.balanced_accuracy_score` metric and a corresponding
   ``'balanced_accuracy'`` scorer for binary classification.
   :issue:`8066` by :user:`xyguo` and :user:`Aman Dalmia <dalmia>`.
 
@@ -1,10 +1,10 @@
 import numpy as np
 import scipy.sparse as sp
+import pytest
 from scipy.sparse import csr_matrix
 
 from sklearn import datasets
 from sklearn.utils.testing import assert_false
-from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises_regexp
@@ -34,13 +34,13 @@ def test_silhouette():
         assert_greater(score_precomputed, 0)
         # Test without calculating D
         score_euclidean = silhouette_score(X, y, metric='euclidean')
-        assert_almost_equal(score_precomputed, score_euclidean)
+        pytest.approx(score_precomputed, score_euclidean)
 
         if X is X_dense:
             score_dense_without_sampling = score_precomputed
         else:
-            assert_almost_equal(score_euclidean,
-                                score_dense_without_sampling)
+            pytest.approx(score_euclidean,
+                          score_dense_without_sampling)
 
         # Test with sampling
         score_precomputed = silhouette_score(D, y, metric='precomputed',
@@ -51,12 +51,12 @@ def test_silhouette():
                                            random_state=0)
         assert_greater(score_precomputed, 0)
         assert_greater(score_euclidean, 0)
-        assert_almost_equal(score_euclidean, score_precomputed)
+        pytest.approx(score_euclidean, score_precomputed)
 
         if X is X_dense:
             score_dense_with_sampling = score_precomputed
         else:
-            assert_almost_equal(score_euclidean, score_dense_with_sampling)
+            pytest.approx(score_euclidean, score_dense_with_sampling)
 
 
 def test_cluster_size_1():
@@ -121,12 +121,14 @@ def test_silhouette_paper_example():
                                     (labels2, expected2, score2)]:
         expected = [expected[name] for name in names]
         # we check to 2dp because that's what's in the paper
-        assert_almost_equal(expected, silhouette_samples(D, np.array(labels),
-                                                         metric='precomputed'),
-                            decimal=2)
-        assert_almost_equal(score, silhouette_score(D, np.array(labels),
-                                                    metric='precomputed'),
-                            decimal=2)
+        pytest.approx(expected,
+                      silhouette_samples(D, np.array(labels),
+                                         metric='precomputed'),
+                      abs=1e-2)
+        pytest.approx(score,
+                      silhouette_score(D, np.array(labels),
+                                       metric='precomputed'),
+                      abs=1e-2)
 
 
 def test_correct_labelsize():
@@ -167,19 +169,27 @@ def test_non_numpy_labels():
         silhouette_score(list(X), list(y)), silhouette_score(X, y))
 
 
-def test_calinski_harabaz_score():
+def assert_raises_on_only_one_label(func):
+    """Assert message when there is only one label"""
     rng = np.random.RandomState(seed=0)
-
-    # Assert message when there is only one label
     assert_raise_message(ValueError, "Number of labels is",
-                         calinski_harabaz_score,
+                         func,
                          rng.rand(10, 2), np.zeros(10))
 
-    # Assert message when all point are in different clusters
+
+def assert_raises_on_all_points_same_cluster(func):
+    """Assert message when all point are in different clusters"""
+    rng = np.random.RandomState(seed=0)
     assert_raise_message(ValueError, "Number of labels is",
-                         calinski_harabaz_score,
+                         func,
                          rng.rand(10, 2), np.arange(10))
 
+
+def test_calinski_harabaz_score():
+    assert_raises_on_only_one_label(calinski_harabaz_score)
+
+    assert_raises_on_all_points_same_cluster(calinski_harabaz_score)
+
     # Assert the value is 1. when all samples are equals
     assert_equal(1., calinski_harabaz_score(np.ones((10, 2)),
                                             [0] * 5 + [1] * 5))
@@ -192,40 +202,29 @@ def test_calinski_harabaz_score():
     X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
          [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
     labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
-    assert_almost_equal(calinski_harabaz_score(X, labels),
+    pytest.approx(calinski_harabaz_score(X, labels),
                         45 * (40 - 4) / (5 * (4 - 1)))
 
 
 def test_davies_bouldin_index():
-    rng = np.random.RandomState(seed=0)
-
-    # Assert message when there is only one label
-    assert_raise_message(ValueError, "Number of labels is",
-                         davies_bouldin_index,
-                         rng.rand(10, 2), np.zeros(10))
-
-    # Assert message when all point are in different clusters
-    assert_raise_message(ValueError, "Number of labels is",
-                         davies_bouldin_index,
-                         rng.rand(10, 2), np.arange(10))
+    assert_raises_on_only_one_label(davies_bouldin_index)
+    assert_raises_on_all_points_same_cluster(davies_bouldin_index)
 
     # Assert the value is 0. when all samples are equals
-    assert 0. == davies_bouldin_index(np.ones((10, 2)),
-                                      [0] * 5 + [1] * 5)
+    assert davies_bouldin_index(np.ones((10, 2)),
+                                [0] * 5 + [1] * 5) == pytest.approx(0.0)
 
     # Assert the value is 0. when all the mean cluster are equal
-    assert 0. == davies_bouldin_index([[-1, -1], [1, 1]] * 10,
-                                      [0] * 10 + [1] * 10)
+    assert davies_bouldin_index([[-1, -1], [1, 1]] * 10,
+                                [0] * 10 + [1] * 10) == pytest.approx(0.0)
 
     # General case (with non numpy arrays)
     X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
          [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
     labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
-    assert_almost_equal(davies_bouldin_index(X, labels),
-                        2 * np.sqrt(0.5) / 3)
+    pytest.approx(davies_bouldin_index(X, labels), 2 * np.sqrt(0.5) / 3)
 
     # General case - cluster have one sample
     X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
     labels = [0, 0, 1, 2]
-    assert_almost_equal(davies_bouldin_index(X, labels),
-                        (5. / 4) / 3)
+    pytest.approx(davies_bouldin_index(X, labels), (5. / 4) / 3)