9
9
10
10
from __future__ import division
11
11
import inspect
12
+ import warnings
12
13
13
14
from math import log
14
15
import numpy as np
17
18
18
19
from .base import BaseEstimator , ClassifierMixin , RegressorMixin , clone
19
20
from .preprocessing import LabelBinarizer
21
+ from .utils import check_random_state
20
22
from .utils import check_X_y , check_array , indexable , column_or_1d
21
23
from .utils .validation import check_is_fitted
22
24
from .isotonic import IsotonicRegression
23
- from .naive_bayes import GaussianNB
25
+ from .svm import LinearSVC
24
26
from .cross_validation import _check_cv
25
27
from .metrics .classification import _check_binary_probabilistic_predictions
26
28
@@ -57,6 +59,9 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
57
59
If "prefit" is passed, it is assumed that base_estimator has been
58
60
fitted already and all data is used for calibration.
59
61
62
+ random_state : int, RandomState instance or None (default=None)
63
+ Used to randomly break ties when method is 'isotonic'.
64
+
60
65
Attributes
61
66
----------
62
67
classes_ : array, shape (n_classes)
@@ -81,10 +86,12 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
81
86
.. [4] Predicting Good Probabilities with Supervised Learning,
82
87
A. Niculescu-Mizil & R. Caruana, ICML 2005
83
88
"""
84
- def __init__ (self , base_estimator = GaussianNB (), method = 'sigmoid' , cv = 3 ):
89
+ def __init__ (self , base_estimator = None , method = 'sigmoid' , cv = 3 ,
90
+ random_state = None ):
85
91
self .base_estimator = base_estimator
86
92
self .method = method
87
93
self .cv = cv
94
+ self .random_state = random_state
88
95
89
96
def fit (self , X , y , sample_weight = None ):
90
97
"""Fit the calibrated model
@@ -109,6 +116,7 @@ def fit(self, X, y, sample_weight=None):
109
116
X , y = indexable (X , y )
110
117
lb = LabelBinarizer ().fit (y )
111
118
self .classes_ = lb .classes_
119
+ random_state = check_random_state (self .random_state )
112
120
113
121
# Check that we each cross-validation fold can have at least one
114
122
# example per class
@@ -121,28 +129,43 @@ def fit(self, X, y, sample_weight=None):
121
129
% (n_folds , n_folds ))
122
130
123
131
self .calibrated_classifiers_ = []
132
+ if self .base_estimator is None :
133
+ base_estimator = LinearSVC ()
134
+ else :
135
+ base_estimator = self .base_estimator
136
+
124
137
if self .cv == "prefit" :
125
- calibrated_classifier = _CalibratedClassifier (self . base_estimator ,
126
- method = self .method )
138
+ calibrated_classifier = _CalibratedClassifier (
139
+ base_estimator , method = self .method , random_state = random_state )
127
140
if sample_weight is not None :
128
141
calibrated_classifier .fit (X , y , sample_weight )
129
142
else :
130
143
calibrated_classifier .fit (X , y )
131
144
self .calibrated_classifiers_ .append (calibrated_classifier )
132
145
else :
133
146
cv = _check_cv (self .cv , X , y , classifier = True )
147
+ arg_names = inspect .getargspec (base_estimator .fit )[0 ]
148
+ estimator_name = type (base_estimator ).__name__
149
+ if (sample_weight is not None
150
+ and "sample_weight" not in arg_names ):
151
+ warnings .warn ("%s does not support sample_weight. Samples"
152
+ " weights are only used for the calibration"
153
+ " itself." % estimator_name )
154
+ base_estimator_sample_weight = None
155
+ else :
156
+ base_estimator_sample_weight = sample_weight
134
157
for train , test in cv :
135
- this_estimator = clone (self .base_estimator )
136
- if sample_weight is not None and \
137
- "sample_weight" in inspect .getargspec (
138
- this_estimator .fit )[0 ]:
139
- this_estimator .fit (X [train ], y [train ],
140
- sample_weight [train ])
158
+ this_estimator = clone (base_estimator )
159
+ if base_estimator_sample_weight is not None :
160
+ this_estimator .fit (
161
+ X [train ], y [train ],
162
+ sample_weight = base_estimator_sample_weight [train ])
141
163
else :
142
164
this_estimator .fit (X [train ], y [train ])
143
165
144
- calibrated_classifier = \
145
- _CalibratedClassifier (this_estimator , method = self .method )
166
+ calibrated_classifier = _CalibratedClassifier (
167
+ this_estimator , method = self .method ,
168
+ random_state = random_state )
146
169
if sample_weight is not None :
147
170
calibrated_classifier .fit (X [test ], y [test ],
148
171
sample_weight [test ])
@@ -219,6 +242,9 @@ class _CalibratedClassifier(object):
219
242
corresponds to Platt's method or 'isotonic' which is a
220
243
non-parameteric approach based on isotonic regression.
221
244
245
+ random_state : int, RandomState instance or None (default=None)
246
+ Used to randomly break ties when method is 'isotonic'.
247
+
222
248
References
223
249
----------
224
250
.. [1] Obtaining calibrated probability estimates from decision trees
@@ -233,9 +259,11 @@ class _CalibratedClassifier(object):
233
259
.. [4] Predicting Good Probabilities with Supervised Learning,
234
260
A. Niculescu-Mizil & R. Caruana, ICML 2005
235
261
"""
236
- def __init__ (self , base_estimator , method = 'sigmoid' ):
262
+ def __init__ (self , base_estimator , method = 'sigmoid' ,
263
+ random_state = None ):
237
264
self .base_estimator = base_estimator
238
265
self .method = method
266
+ self .random_state = random_state
239
267
240
268
def _preproc (self , X ):
241
269
n_classes = len (self .classes_ )
@@ -289,8 +317,8 @@ def fit(self, X, y, sample_weight=None):
289
317
# have different outputs. Since this is not untypical
290
318
# when calibrating, we add some small random jitter to
291
319
# the inputs.
292
- this_df = \
293
- this_df + np . random . normal ( 0 , 1e-10 , this_df . shape [ 0 ])
320
+ jitter = self . random_state . normal ( 0 , 1e-10 , this_df . shape [ 0 ])
321
+ this_df = this_df + jitter
294
322
elif self .method == 'sigmoid' :
295
323
calibrator = _SigmoidCalibration ()
296
324
else :
0 commit comments