1
1
import numpy as np
2
2
import scipy .sparse as sp
3
+ import pytest
3
4
from scipy .sparse import csr_matrix
4
5
5
6
from sklearn import datasets
6
7
from sklearn .utils .testing import assert_false
7
- from sklearn .utils .testing import assert_almost_equal
8
8
from sklearn .utils .testing import assert_array_equal
9
9
from sklearn .utils .testing import assert_equal
10
10
from sklearn .utils .testing import assert_raises_regexp
@@ -34,13 +34,13 @@ def test_silhouette():
34
34
assert_greater (score_precomputed , 0 )
35
35
# Test without calculating D
36
36
score_euclidean = silhouette_score (X , y , metric = 'euclidean' )
37
- assert_almost_equal (score_precomputed , score_euclidean )
37
+ pytest . approx (score_precomputed , score_euclidean )
38
38
39
39
if X is X_dense :
40
40
score_dense_without_sampling = score_precomputed
41
41
else :
42
- assert_almost_equal (score_euclidean ,
43
- score_dense_without_sampling )
42
+ pytest . approx (score_euclidean ,
43
+ score_dense_without_sampling )
44
44
45
45
# Test with sampling
46
46
score_precomputed = silhouette_score (D , y , metric = 'precomputed' ,
@@ -51,12 +51,12 @@ def test_silhouette():
51
51
random_state = 0 )
52
52
assert_greater (score_precomputed , 0 )
53
53
assert_greater (score_euclidean , 0 )
54
- assert_almost_equal (score_euclidean , score_precomputed )
54
+ pytest . approx (score_euclidean , score_precomputed )
55
55
56
56
if X is X_dense :
57
57
score_dense_with_sampling = score_precomputed
58
58
else :
59
- assert_almost_equal (score_euclidean , score_dense_with_sampling )
59
+ pytest . approx (score_euclidean , score_dense_with_sampling )
60
60
61
61
62
62
def test_cluster_size_1 ():
@@ -121,12 +121,14 @@ def test_silhouette_paper_example():
121
121
(labels2 , expected2 , score2 )]:
122
122
expected = [expected [name ] for name in names ]
123
123
# we check to 2dp because that's what's in the paper
124
- assert_almost_equal (expected , silhouette_samples (D , np .array (labels ),
125
- metric = 'precomputed' ),
126
- decimal = 2 )
127
- assert_almost_equal (score , silhouette_score (D , np .array (labels ),
128
- metric = 'precomputed' ),
129
- decimal = 2 )
124
+ pytest .approx (expected ,
125
+ silhouette_samples (D , np .array (labels ),
126
+ metric = 'precomputed' ),
127
+ abs = 1e-2 )
128
+ pytest .approx (score ,
129
+ silhouette_score (D , np .array (labels ),
130
+ metric = 'precomputed' ),
131
+ abs = 1e-2 )
130
132
131
133
132
134
def test_correct_labelsize ():
@@ -167,19 +169,27 @@ def test_non_numpy_labels():
167
169
silhouette_score (list (X ), list (y )), silhouette_score (X , y ))
168
170
169
171
170
- def test_calinski_harabaz_score ():
172
+ def assert_raises_on_only_one_label (func ):
173
+ """Assert message when there is only one label"""
171
174
rng = np .random .RandomState (seed = 0 )
172
-
173
- # Assert message when there is only one label
174
175
assert_raise_message (ValueError , "Number of labels is" ,
175
- calinski_harabaz_score ,
176
+ func ,
176
177
rng .rand (10 , 2 ), np .zeros (10 ))
177
178
178
- # Assert message when all point are in different clusters
179
+
180
+ def assert_raises_on_all_points_same_cluster (func ):
181
+ """Assert message when all point are in different clusters"""
182
+ rng = np .random .RandomState (seed = 0 )
179
183
assert_raise_message (ValueError , "Number of labels is" ,
180
- calinski_harabaz_score ,
184
+ func ,
181
185
rng .rand (10 , 2 ), np .arange (10 ))
182
186
187
+
188
+ def test_calinski_harabaz_score ():
189
+ assert_raises_on_only_one_label (calinski_harabaz_score )
190
+
191
+ assert_raises_on_all_points_same_cluster (calinski_harabaz_score )
192
+
183
193
# Assert the value is 1. when all samples are equals
184
194
assert_equal (1. , calinski_harabaz_score (np .ones ((10 , 2 )),
185
195
[0 ] * 5 + [1 ] * 5 ))
@@ -192,40 +202,29 @@ def test_calinski_harabaz_score():
192
202
X = ([[0 , 0 ], [1 , 1 ]] * 5 + [[3 , 3 ], [4 , 4 ]] * 5 +
193
203
[[0 , 4 ], [1 , 3 ]] * 5 + [[3 , 1 ], [4 , 0 ]] * 5 )
194
204
labels = [0 ] * 10 + [1 ] * 10 + [2 ] * 10 + [3 ] * 10
195
- assert_almost_equal (calinski_harabaz_score (X , labels ),
205
+ pytest . approx (calinski_harabaz_score (X , labels ),
196
206
45 * (40 - 4 ) / (5 * (4 - 1 )))
197
207
198
208
199
209
def test_davies_bouldin_index ():
200
- rng = np .random .RandomState (seed = 0 )
201
-
202
- # Assert message when there is only one label
203
- assert_raise_message (ValueError , "Number of labels is" ,
204
- davies_bouldin_index ,
205
- rng .rand (10 , 2 ), np .zeros (10 ))
206
-
207
- # Assert message when all point are in different clusters
208
- assert_raise_message (ValueError , "Number of labels is" ,
209
- davies_bouldin_index ,
210
- rng .rand (10 , 2 ), np .arange (10 ))
210
+ assert_raises_on_only_one_label (davies_bouldin_index )
211
+ assert_raises_on_all_points_same_cluster (davies_bouldin_index )
211
212
212
213
# Assert the value is 0. when all samples are equals
213
- assert 0. == davies_bouldin_index (np .ones ((10 , 2 )),
214
- [0 ] * 5 + [1 ] * 5 )
214
+ assert davies_bouldin_index (np .ones ((10 , 2 )),
215
+ [0 ] * 5 + [1 ] * 5 ) == pytest . approx ( 0.0 )
215
216
216
217
# Assert the value is 0. when all the mean cluster are equal
217
- assert 0. == davies_bouldin_index ([[- 1 , - 1 ], [1 , 1 ]] * 10 ,
218
- [0 ] * 10 + [1 ] * 10 )
218
+ assert davies_bouldin_index ([[- 1 , - 1 ], [1 , 1 ]] * 10 ,
219
+ [0 ] * 10 + [1 ] * 10 ) == pytest . approx ( 0.0 )
219
220
220
221
# General case (with non numpy arrays)
221
222
X = ([[0 , 0 ], [1 , 1 ]] * 5 + [[3 , 3 ], [4 , 4 ]] * 5 +
222
223
[[0 , 4 ], [1 , 3 ]] * 5 + [[3 , 1 ], [4 , 0 ]] * 5 )
223
224
labels = [0 ] * 10 + [1 ] * 10 + [2 ] * 10 + [3 ] * 10
224
- assert_almost_equal (davies_bouldin_index (X , labels ),
225
- 2 * np .sqrt (0.5 ) / 3 )
225
+ pytest .approx (davies_bouldin_index (X , labels ), 2 * np .sqrt (0.5 ) / 3 )
226
226
227
227
# General case - cluster have one sample
228
228
X = ([[0 , 0 ], [2 , 2 ], [3 , 3 ], [5 , 5 ]])
229
229
labels = [0 , 0 , 1 , 2 ]
230
- assert_almost_equal (davies_bouldin_index (X , labels ),
231
- (5. / 4 ) / 3 )
230
+ pytest .approx (davies_bouldin_index (X , labels ), (5. / 4 ) / 3 )
0 commit comments