@@ -28,10 +28,17 @@ class DummyClassifier(BaseEstimator, ClassifierMixin):
28
28
* "most_frequent": always predicts the most frequent label in the
29
29
training set.
30
30
* "uniform": generates predictions uniformly at random.
31
+ * "constant": always predicts a constant label that is provided by
32
+ the user. This is useful for metrics that evaluate a non-majority
33
+ class
31
34
32
35
random_state: int seed, RandomState instance, or None (default)
33
36
The seed of the pseudo random number generator to use.
34
37
38
+ constant: int or str or array of shape = [n_outputs]
39
+ The explicit constant as predicted by the "constant" strategy. This
40
+ parameter is useful only for the "constant" strategy.
41
+
35
42
Attributes
36
43
----------
37
44
`classes_` : array or list of array of shape = [n_classes]
@@ -48,11 +55,14 @@ class DummyClassifier(BaseEstimator, ClassifierMixin):
48
55
49
56
`outputs_2d_` : bool,
50
57
True if the output at fit is 2d, else false.
58
+
51
59
"""
52
60
53
- def __init__ (self , strategy = "stratified" , random_state = None ):
61
+ def __init__ (self , strategy = "stratified" , random_state = None ,
62
+ constant = None ):
54
63
self .strategy = strategy
55
64
self .random_state = random_state
65
+ self .constant = constant
56
66
57
67
def fit (self , X , y ):
58
68
"""Fit the random classifier.
@@ -71,7 +81,8 @@ def fit(self, X, y):
71
81
self : object
72
82
Returns self.
73
83
"""
74
- if self .strategy not in ("most_frequent" , "stratified" , "uniform" ):
84
+ if self .strategy not in ("most_frequent" , "stratified" , "uniform" ,
85
+ "constant" ):
75
86
raise ValueError ("Unknown strategy type." )
76
87
77
88
y = np .atleast_1d (y )
@@ -85,12 +96,29 @@ def fit(self, X, y):
85
96
self .n_classes_ = []
86
97
self .class_prior_ = []
87
98
99
+ if self .strategy == "constant" :
100
+ if self .constant is None :
101
+ raise ValueError ("Constant target value has to be specified "
102
+ "when the constant strategy is used." )
103
+ else :
104
+ constant = np .reshape (np .atleast_1d (self .constant ), (- 1 , 1 ))
105
+ if constant .shape [0 ] != self .n_outputs_ :
106
+ raise ValueError ("Constant target value should have "
107
+ "shape (%d, 1)." % self .n_outputs_ )
108
+
88
109
for k in xrange (self .n_outputs_ ):
89
110
classes , y_k = unique (y [:, k ], return_inverse = True )
90
111
self .classes_ .append (classes )
91
112
self .n_classes_ .append (classes .shape [0 ])
92
113
self .class_prior_ .append (np .bincount (y_k ) / float (y_k .shape [0 ]))
93
114
115
+ # Checking in case of constant strategy if the constant provided
116
+ # by the user is in y.
117
+ if self .strategy == "constant" :
118
+ if constant [k ] not in self .classes_ [k ]:
119
+ raise ValueError ("The constant target value must be "
120
+ "present in training data" )
121
+
94
122
if self .n_outputs_ == 1 and not self .output_2d_ :
95
123
self .n_classes_ = self .n_classes_ [0 ]
96
124
self .classes_ = self .classes_ [0 ]
@@ -123,12 +151,13 @@ def predict(self, X):
123
151
n_classes_ = self .n_classes_
124
152
classes_ = self .classes_
125
153
class_prior_ = self .class_prior_
154
+ constant = self .constant
126
155
if self .n_outputs_ == 1 :
127
156
# Get same type even for self.n_outputs_ == 1
128
157
n_classes_ = [n_classes_ ]
129
158
classes_ = [classes_ ]
130
159
class_prior_ = [class_prior_ ]
131
-
160
+ constant = [ constant ]
132
161
# Compute probability only once
133
162
if self .strategy == "stratified" :
134
163
proba = self .predict_proba (X )
@@ -146,6 +175,10 @@ def predict(self, X):
146
175
elif self .strategy == "uniform" :
147
176
ret = rs .randint (n_classes_ [k ], size = n_samples )
148
177
178
+ elif self .strategy == "constant" :
179
+ ret = np .ones (n_samples , dtype = int ) * (
180
+ np .where (classes_ [k ] == constant [k ]))
181
+
149
182
y .append (classes_ [k ][ret ])
150
183
151
184
y = np .vstack (y ).T
@@ -181,11 +214,13 @@ def predict_proba(self, X):
181
214
n_classes_ = self .n_classes_
182
215
classes_ = self .classes_
183
216
class_prior_ = self .class_prior_
217
+ constant = self .constant
184
218
if self .n_outputs_ == 1 and not self .output_2d_ :
185
219
# Get same type even for self.n_outputs_ == 1
186
220
n_classes_ = [n_classes_ ]
187
221
classes_ = [classes_ ]
188
222
class_prior_ = [class_prior_ ]
223
+ constant = [constant ]
189
224
190
225
P = []
191
226
for k in xrange (self .n_outputs_ ):
@@ -201,6 +236,11 @@ def predict_proba(self, X):
201
236
out = np .ones ((n_samples , n_classes_ [k ]), dtype = np .float64 )
202
237
out /= n_classes_ [k ]
203
238
239
+ elif self .strategy == "constant" :
240
+ ind = np .where (classes_ [k ] == constant [k ])
241
+ out = np .zeros ((n_samples , n_classes_ [k ]), dtype = np .float64 )
242
+ out [:, ind ] = 1.0
243
+
204
244
P .append (out )
205
245
206
246
if self .n_outputs_ == 1 and not self .output_2d_ :
0 commit comments