14
14
import numpy as np
15
15
16
16
from scipy .optimize import fmin_bfgs
17
+ from sklearn .preprocessing import LabelEncoder
17
18
18
19
from .base import BaseEstimator , ClassifierMixin , RegressorMixin , clone
19
- from .preprocessing import LabelBinarizer
20
+ from .preprocessing import label_binarize , LabelBinarizer
20
21
from .utils import check_X_y , check_array , indexable , column_or_1d
21
22
from .utils .validation import check_is_fitted
22
23
from .utils .fixes import signature
@@ -50,7 +51,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
50
51
The method to use for calibration. Can be 'sigmoid' which
51
52
corresponds to Platt's method or 'isotonic' which is a
52
53
non-parametric approach. It is not advised to use isotonic calibration
53
- with too few calibration samples ``(<<1000)`` since it tends to overfit.
54
+ with too few calibration samples ``(<<1000)`` since it tends to
55
+ overfit.
54
56
Use sigmoids (Platt's calibration) in this case.
55
57
56
58
cv : integer, cross-validation generator, iterable or "prefit", optional
@@ -63,8 +65,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
63
65
- An iterable yielding train/test splits.
64
66
65
67
For integer/None inputs, if ``y`` is binary or multiclass,
66
- :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y``
67
- is neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
68
+ :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` is
69
+ neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
68
70
is used.
69
71
70
72
Refer :ref:`User Guide <cross_validation>` for the various
@@ -124,15 +126,16 @@ def fit(self, X, y, sample_weight=None):
124
126
X , y = check_X_y (X , y , accept_sparse = ['csc' , 'csr' , 'coo' ],
125
127
force_all_finite = False )
126
128
X , y = indexable (X , y )
127
- lb = LabelBinarizer ().fit (y )
128
- self .classes_ = lb .classes_
8000
code>
129
+ le = LabelBinarizer ().fit (y )
130
+ self .classes_ = le .classes_
129
131
130
132
# Check that each cross-validation fold can have at least one
131
133
# example per class
132
134
n_folds = self .cv if isinstance (self .cv , int ) \
133
135
else self .cv .n_folds if hasattr (self .cv , "n_folds" ) else None
134
136
if n_folds and \
135
- np .any ([np .sum (y == class_ ) < n_folds for class_ in self .classes_ ]):
137
+ np .any ([np .sum (y == class_ ) < n_folds for class_ in
138
+ self .classes_ ]):
136
139
raise ValueError ("Requesting %d-fold cross-validation but provided"
137
140
" less than %d examples for at least one class."
138
141
% (n_folds , n_folds ))
@@ -175,7 +178,8 @@ def fit(self, X, y, sample_weight=None):
175
178
this_estimator .fit (X [train ], y [train ])
176
179
177
180
calibrated_classifier = _CalibratedClassifier (
178
- this_estimator , method = self .method )
181
+ this_estimator , method = self .method ,
182
+ classes = self .classes_ )
179
183
if sample_weight is not None :
180
184
calibrated_classifier .fit (X [test ], y [test ],
181
185
sample_weight [test ])
@@ -253,6 +257,11 @@ class _CalibratedClassifier(object):
253
257
corresponds to Platt's method or 'isotonic' which is a
254
258
non-parametric approach based on isotonic regression.
255
259
260
+ classes : array-like, shape (n_classes,), optional
261
+ Contains unique classes used to fit the base estimator.
262
+ if None, then classes is extracted from the given target values
263
+ in fit().
264
+
256
265
References
257
266
----------
258
267
.. [1] Obtaining calibrated probability estimates from decision trees
@@ -267,9 +276,10 @@ class _CalibratedClassifier(object):
267
276
.. [4] Predicting Good Probabilities with Supervised Learning,
268
277
A. Niculescu-Mizil & R. Caruana, ICML 2005
269
278
"""
270
- def __init__ (self , base_estimator , method = 'sigmoid' ):
279
+ def __init__ (self , base_estimator , method = 'sigmoid' , classes = None ):
271
280
self .base_estimator = base_estimator
272
281
self .method = method
282
+ self .classes = classes
273
283
274
284
def _preproc (self , X ):
275
285
n_classes = len (self .classes_ )
@@ -285,7 +295,8 @@ def _preproc(self, X):
285
295
raise RuntimeError ('classifier has no decision_function or '
286
296
'predict_proba method.' )
287
297
288
- idx_pos_class = np .arange (df .shape [1 ])
298
+ idx_pos_class = self .label_encoder_ .\
299
+ transform (self .base_estimator .classes_ )
289
300
290
301
return df , idx_pos_class
291
302
@@ -308,9 +319,15 @@ def fit(self, X, y, sample_weight=None):
308
319
self : object
309
320
Returns an instance of self.
310
321
"""
311
- lb = LabelBinarizer ()
312
- Y = lb .fit_transform (y )
313
- self .classes_ = lb .classes_
322
+
323
+ self .label_encoder_ = LabelEncoder ()
324
+ if self .classes is None :
325
+ self .label_encoder_ .fit (y )
326
+ else :
327
+ self .label_encoder_ .fit (self .classes )
328
+
329
+ self .classes_ = self .label_encoder_ .classes_
330
+ Y = label_binarize (y , self .classes_ )
314
331
315
332
df , idx_pos_class = self ._preproc (X )
316
333
self .calibrators_ = []
0 commit comments