1
1
import numpy as np
2
2
3
+ from sklearn .linear_model import LogisticRegression
4
+ from sklearn .datasets import make_blobs
5
+
3
6
from sklearn .utils .class_weight import compute_class_weight
4
7
from sklearn .utils .class_weight import compute_sample_weight
5
8
@@ -26,6 +29,27 @@ def test_compute_class_weight_not_present():
26
29
assert_raises (ValueError , compute_class_weight , "auto" , classes , y )
27
30
28
31
32
+ def test_compute_class_weight_invariance ():
33
+ # test that results with class_weight="auto" is invariant against
34
+ # class imbalance if the number of samples is identical
35
+ X , y = make_blobs (centers = 2 , random_state = 0 )
36
+ # create dataset where class 1 is duplicated twice
37
+ X_1 = np .vstack ([X ] + [X [y == 1 ]] * 2 )
38
+ y_1 = np .hstack ([y ] + [y [y == 1 ]] * 2 )
39
+ # create dataset where class 0 is duplicated twice
40
+ X_0 = np .vstack ([X ] + [X [y == 0 ]] * 2 )
41
+ y_0 = np .hstack ([y ] + [y [y == 0 ]] * 2 )
42
+ # cuplicate everything
43
+ X_ = np .vstack ([X ] * 2 )
44
+ y_ = np .hstack ([y ] * 2 )
45
+ # results should be identical
46
+ logreg1 = LogisticRegression (class_weight = "auto" ).fit (X_1 , y_1 )
47
+ logreg0 = LogisticRegression (class_weight = "auto" ).fit (X_0 , y_0 )
48
+ logreg = LogisticRegression (class_weight = "auto" ).fit (X_ , y_ )
49
+ assert_array_almost_equal (logreg1 .coef_ , logreg0 .coef_ )
50
+ assert_array_almost_equal (logreg .coef_ , logreg0 .coef_ )
51
+
52
+
29
53
def test_compute_class_weight_auto_negative ():
30
54
"""Test compute_class_weight when labels are negative"""
31
55
# Test with balanced class labels.
@@ -116,7 +140,7 @@ def test_compute_sample_weight_with_subsample():
116
140
# Test with a bootstrap subsample
117
141
y = np .asarray ([1 , 1 , 1 , 2 , 2 , 2 ])
118
142
sample_weight = compute_sample_weight ("auto" , y , [0 , 1 , 1 , 2 , 2 , 3 ])
119
- expected = np .asarray ([1 / 3. , 1 / 3. , 1 / 3. , 5 / 3. , 5 / 3. , 5 / 3. ])
143
+ expected = np .asarray ([1 / 3. , 1 / 3. , 1 / 3. , 5 / 3. , 5 / 3. , 5 / 3. ])
120
144
assert_array_almost_equal (sample_weight , expected )
121
145
122
146
# Test with a bootstrap subsample for multi-output
0 commit comments