3
3
"""
4
4
import warnings
5
5
import numpy as np
6
- from nose .tools import assert_raises
6
+ from nose .tools import assert_raises , assert_equal
7
7
from numpy .testing import assert_array_equal
8
8
9
9
from sklearn .utils .testing import all_estimators
10
10
from sklearn .utils .testing import assert_greater
11
11
from sklearn .base import clone , ClassifierMixin , RegressorMixin
12
12
from sklearn .utils import shuffle
13
13
from sklearn .preprocessing import Scaler
14
- # from sklearn.datasets import load_digits
14
+ from sklearn .cross_validation import train_test_split
15
15
from sklearn .datasets import load_iris , load_boston
16
16
from sklearn .metrics import zero_one_score
17
17
from sklearn .lda import LDA
26
26
OutputCodeClassifier
27
27
from sklearn .feature_selection import RFE , RFECV
28
28
from sklearn .naive_bayes import MultinomialNB , BernoulliNB
29
- from sklearn .linear_model import RidgeClassifier , RidgeClassifierCV
30
29
31
30
dont_test = [Pipeline , GridSearchCV , SparseCoder ]
32
31
meta_estimators = [BaseEnsemble , OneVsOneClassifier , OutputCodeClassifier ,
@@ -55,15 +54,17 @@ def test_all_estimators():
55
54
print (w )
56
55
57
56
58
- def test_classifiers ():
57
+ def test_classifiers_train ():
58
+ # test if classifiers do something sensible on training set
59
+ # also test all shapes / shape errors
59
60
estimators = all_estimators ()
60
61
classifiers = [(name , E ) for name , E in estimators if issubclass (E ,
61
62
ClassifierMixin )]
62
63
iris = load_iris ()
63
64
X , y = iris .data , iris .target
64
65
X , y = shuffle (X , y , random_state = 7 )
65
- #digits = load_digits()
66
- #X, y = digits.data, digits.target
66
+ n_samples , n_features = X . shape
67
+ n_labels = len ( np . unique ( y ))
67
68
X = Scaler ().fit_transform (X )
68
69
for name , Clf in classifiers :
69
70
if Clf in dont_test or Clf in meta_estimators :
@@ -75,6 +76,7 @@ def test_classifiers():
75
76
# fit
76
77
clf .fit (X , y )
77
78
y_pred = clf .predict (X )
79
+ assert_equal (y_pred .shape , (n_samples ,))
78
80
# training set performance
79
81
assert_greater (zero_one_score (y , y_pred ), 0.78 )
80
82
# raises error on malformed input for predict
@@ -84,24 +86,82 @@ def test_classifiers():
84
86
assert_raises (ValueError , clf .predict , X .T )
85
87
if hasattr (clf , "decision_function" ):
10000
86
88
try :
87
- #raises error on malformed input for decision_function
89
+ # raises error on malformed input for decision_function
88
90
assert_raises (ValueError , clf .decision_function , X .T )
89
- #decision_function agrees with predict:
91
+ # decision_function agrees with predict:
90
92
decision = clf .decision_function (X )
93
+ assert_equal (decision .shape , (n_samples , n_labels ))
91
94
assert_array_equal (np .argmax (decision , axis = 1 ), y_pred )
92
95
except NotImplementedError :
93
96
pass
94
97
if hasattr (clf , "predict_proba" ):
95
98
try :
99
+ # raises error on malformed input for predict_proba
96
100
assert_raises (ValueError , clf .predict_proba , X .T )
97
- # decision_function agrees with predict:
101
+ # predict_proba agrees with predict:
98
102
y_prob = clf .predict_proba (X )
103
+ assert_equal (y_prob .shape , (n_samples , n_labels ))
99
104
assert_array_equal (np .argmax (y_prob , axis = 1 ), y_pred )
100
105
except NotImplementedError :
101
106
pass
102
107
103
108
104
- def test_regressors ():
109
+ def test_classifiers_classes ():
110
+ # test if classifiers can cope with non-consecutive classes
111
+ estimators = all_estimators ()
112
+ classifiers = [(name , E ) for name , E in estimators if issubclass (E ,
113
+ ClassifierMixin )]
114
+ iris = load_iris ()
115
+ X , y = iris .data , iris .target
116
+ X , y = shuffle (X , y , random_state = 7 )
117
+ X = Scaler ().fit_transform (X )
118
+ y = 2 * y + 1
119
+ # TODO: make work with next line :)
120
+ #y = y.astype(np.str)
121
+ for name , Clf in classifiers :
122
+ if Clf in dont_test or Clf in meta_estimators :
123
+ continue
124
+ if Clf in [MultinomialNB , BernoulliNB ]:
125
+ # TODO also test these!
126
+ continue
127
+ clf = Clf ()
128
+ # fit
129
+ clf .fit (X , y )
130
+ y_pred = clf .predict (X )
131
+ # training set performance
132
+ assert_array_equal (np .unique (y ), np .unique (y_pred ))
133
+ assert_greater (zero_one_score (y , y_pred ), 0.78 )
134
+
135
+
136
+ def test_classifiers_test ():
137
+ # test if classifiers can cope with non-consecutive classes
138
+ estimators = all_estimators ()
139
+ classifiers = [(name , E ) for name , E in estimators if issubclass (E ,
140
+ ClassifierMixin )]
141
+ iris = load_iris ()
142
+ X , y = iris .data , iris .target
143
+ X , y = shuffle (X , y , random_state = 7 )
144
+ X = Scaler ().fit_transform (X )
145
+ X_train , X_test , y_train , y_test = train_test_split (X , y )
146
+ for name , Clf in classifiers :
147
+ if Clf in dont_test or Clf in meta_estimators :
148
+ continue
149
+ if Clf in [MultinomialNB , BernoulliNB ]:
150
+ # TODO also test these!
151
+ continue
152
+ clf = Clf ()
153
+ # fit
154
+ try :
155
+ clf .fit (X_train , y_train )
156
+ y_pred = clf .predict (X_test )
157
+ # test set performance
158
+ assert_greater (zero_one_score (y_test , y_pred ), 0.78 )
159
+ except Exception as ex :
160
+ print (ex )
161
+ print (clf )
162
+
163
+
164
+ def test_regressors_train ():
105
165
estimators = all_estimators ()
106
166
regressors = [(name , E ) for name , E in estimators if issubclass (E ,
107
167
RegressorMixin )]
@@ -115,9 +175,6 @@ def test_regressors():
115
175
for name , Reg in regressors :
116
176
if Reg in dont_test or Reg in meta_estimators :
117
177
continue
118
- if Reg in [RidgeClassifier , RidgeClassifierCV ]:
119
- #TODO this is not a regressor!
120
- continue
121
178
reg = Reg ()
122
179
if hasattr (reg , 'alpha' ):
123
180
reg .set_params (alpha = 0.01 )
0 commit comments