8000 Second code review changes · scikit-learn/scikit-learn@513e45f · GitHub
[go: up one dir, main page]

Skip to content

Commit 513e45f

Browse files
committed
Second code review changes
- Fix incorrectly parsed documentation block - Fix references indentation - Refactor test assertions
1 parent 873e92d commit 513e45f

File tree

2 files changed

+52
-53
lines changed

2 files changed

+52
-53
lines changed

doc/modules/clustering.rst

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,13 +1622,13 @@ Then the DB index is defined as:
16221622
DB = \frac{1}{k} \sum{i=1}^k \max_{i \neq j} R_{ij}
16231623
16241624
1625+
In normal usage, the Davies-Bouldin index is applied to the results of a
1626+
cluster analysis.
1627+
16251628
>>> from sklearn import datasets
16261629
>>> iris = datasets.load_iris()
16271630
>>> X = iris.data
16281631

1629-
In normal usage, the Davies-Bouldin index is applied to the results of a
1630-
cluster analysis.
1631-
16321632
>>> from sklearn.cluster import KMeans
16331633
>>> from sklearn.metrics import davies_bouldin_index
16341634
>>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
@@ -1660,20 +1660,20 @@ Drawbacks
16601660

16611661
.. topic:: References
16621662

1663-
* Davies, David L.; Bouldin, Donald W. (1979).
1664-
"A Cluster Separation Measure"
1665-
IEEE Transactions on Pattern Analysis and Machine Intelligence.
1666-
PAMI-1 (2): 224-227.
1667-
`doi:10.1109/TPAMI.1979.4766909 <http://dx.doi.org/10.1109/TPAMI.1979.4766909>`_.
1663+
* Davies, David L.; Bouldin, Donald W. (1979).
1664+
"A Cluster Separation Measure"
1665+
IEEE Transactions on Pattern Analysis and Machine Intelligence.
1666+
PAMI-1 (2): 224-227.
1667+
`doi:10.1109/TPAMI.1979.4766909 <http://dx.doi.org/10.1109/TPAMI.1979.4766909>`_.
1668+
1669+
* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001).
1670+
"On Clustering Validation Techniques"
1671+
Journal of Intelligent Information Systems, 17(2-3), 107-145.
1672+
`doi:10.1023/A:1012801612483 <http://dx.doi.org/10.1023/A:1012801612483>`_.
16681673

1669-
* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001).
1670-
"On Clustering Validation Techniques"
1671-
Journal of Intelligent Information Systems, 17(2-3), 107-145.
1672-
`doi:10.1023/A:1012801612483 <http://dx.doi.org/10.1023/A:1012801612483>`_.
1674+
* `Wikipedia entry for Davies-Bouldin index
1675+
<https://en.wikipedia.org/wiki/Davies–Bouldin_index>`_.
16731676

1674-
* `Wikipedia entry for Davies-Bouldin index
1675-
<https://en.wikipedia.org/wiki/Davies–Bouldin_index>`_.
1676-
16771677

16781678
.. _contingency_matrix:
16791679

sklearn/metrics/cluster/tests/test_unsupervised.py

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import numpy as np
22
import scipy.sparse as sp
3+
import pytest
34
from scipy.sparse import csr_matrix
45

56
from sklearn import datasets
67
from sklearn.utils.testing import assert_false
7-
from sklearn.utils.testing import assert_almost_equal
88
from sklearn.utils.testing import assert_array_equal
99
from sklearn.utils.testing import assert_equal
1010
from sklearn.utils.testing import assert_raises_regexp
@@ -34,13 +34,13 @@ def test_silhouette():
3434
assert_greater(score_precomputed, 0)
3535
# Test without calculating D
3636
score_euclidean = silhouette_score(X, y, metric='euclidean')
37-
assert_almost_equal(score_precomputed, score_euclidean)
37+
pytest.approx(score_precomputed, score_euclidean)
3838

3939
if X is X_dense:
4040
score_dense_without_sampling = score_precomputed
4141
else:
42-
assert_almost_equal(score_euclidean,
43-
score_dense_without_sampling)
42+
pytest.approx(score_euclidean,
43+
score_dense_without_sampling)
4444

4545
# Test with sampling
4646
score_precomputed = silhouette_score(D, y, metric='precomputed',
@@ -51,12 +51,12 @@ def test_silhouette():
5151
random_state=0)
5252
assert_greater(score_precomputed, 0)
5353
assert_greater(score_euclidean, 0)
54-
assert_almost_equal(score_euclidean, score_precomputed)
54+
pytest.approx(score_euclidean, score_precomputed)
5555

5656
if X is X_dense:
5757
score_dense_with_sampling = score_precomputed
5858
else:
59-
assert_almost_equal(score_euclidean, score_dense_with_sampling)
59+
pytest.approx(score_euclidean, score_dense_with_sampling)
6060

6161

6262
def test_cluster_size_1():
@@ -121,12 +121,14 @@ def test_silhouette_paper_example():
121121
(labels2, expected2, score2)]:
122122
expected = [expected[name] for name in names]
123123
# we check to 2dp because that's what's in the paper
124-
assert_almost_equal(expected, silhouette_samples(D, np.array(labels),
125-
metric='precomputed'),
126-
decimal=2)
127-
assert_almost_equal(score, silhouette_score(D, np.array(labels),
128-
metric='precomputed'),
129-
decimal=2)
124+
pytest.approx(expected,
125+
silhouette_samples(D, np.array(labels),
126+
metric='precomputed'),
127+
abs=1e-2)
128+
pytest.approx(score,
129+
silhouette_score(D, np.array(labels),
130+
metric='precomputed'),
131+
abs=1e-2)
130132

131133

132134
def test_correct_labelsize():
@@ -167,19 +169,27 @@ def test_non_numpy_labels():
167169
silhouette_score(list(X), list(y)), silhouette_score(X, y))
168170

169171

170-
def test_calinski_harabaz_score():
172+
def assert_raises_on_only_one_label(func):
173+
"""Assert message when there is only one label"""
171174
rng = np.random.RandomState(seed=0)
172-
173-
# Assert message when there is only one label
174175
assert_raise_message(ValueError, "Number of labels is",
175-
calinski_harabaz_score,
176+
func,
176177
rng.rand(10, 2), np.zeros(10))
177178

178-
# Assert message when all point are in different clusters
179+
180+
def assert_raises_on_all_points_same_cluster(func):
181+
"""Assert message when all point are in different clusters"""
182+
rng = np.random.RandomState(seed=0)
179183
assert_raise_message(ValueError, "Number of labels is",
180-
calinski_harabaz_score,
184+
func,
181185
rng.rand(10, 2), np.arange(10))
182186

187+
188+
def test_calinski_harabaz_score():
189+
assert_raises_on_only_one_label(calinski_harabaz_score)
190+
191+
assert_raises_on_all_points_same_cluster(calinski_harabaz_score)
192+
183193
# Assert the value is 1. when all samples are equals
184194
assert_equal(1., calinski_harabaz_score(np.ones((10, 2)),
185195
[0] * 5 + [1] * 5))
@@ -192,40 +202,29 @@ def test_calinski_harabaz_score():
192202
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
193203
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
194204
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
195-
assert_almost_equal(calinski_harabaz_score(X, labels),
205+
pytest.approx(calinski_harabaz_score(X, labels),
196206
45 * (40 - 4) / (5 * (4 - 1)))
197207

198208

199209
def test_davies_bouldin_index():
200-
rng = np.random.RandomState(seed=0)
201-
202-
# Assert message when there is only one label
203-
assert_raise_message(ValueError, "Number of labels is",
204-
davies_bouldin_index,
205-
rng.rand(10, 2), np.zeros(10))
206-
207-
# Assert message when all point are in different clusters
208-
assert_raise_message(ValueError, "Number of labels is",
209-
davies_bouldin_index,
210-
rng.rand(10, 2), np.arange(10))
210+
assert_raises_on_only_one_label(davies_bouldin_index)
211+
assert_raises_on_all_points_same_cluster(davies_bouldin_index)
211212

212213
# Assert the value is 0. when all samples are equals
213-
assert 0. == davies_bouldin_index(np.ones((10, 2)),
214-
[0] * 5 + [1] * 5)
214+
assert davies_bouldin_index(np.ones((10, 2)),
215+
[0] * 5 + [1] * 5) == pytest.approx(0.0)
215216

216217
# Assert the value is 0. when all the mean cluster are equal
217-
assert 0. == davies_bouldin_index([[-1, -1], [1, 1]] * 10,
218-
[0] * 10 + [1] * 10)
218+
assert davies_bouldin_index([[-1, -1], [1, 1]] * 10,
219+
[0] * 10 + [1] * 10) == pytest.approx(0.0)
219220

220221
# General case (with non numpy arrays)
221222
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
222223
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
223224
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
224-
assert_almost_equal(davies_bouldin_index(X, labels),
225-
2 * np.sqrt(0.5) / 3)
225+
pytest.approx(davies_bouldin_index(X, labels), 2 * np.sqrt(0.5) / 3)
226226

227227
# General case - cluster have one sample
228228
X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
229229
labels = [0, 0, 1, 2]
230-
assert_almost_equal(davies_bouldin_index(X, labels),
231-
(5. / 4) / 3)
230+
pytest.approx(davies_bouldin_index(X, labels), (5. / 4) / 3)

0 commit comments

Comments
 (0)
0