15
15
# License: BSD 3 clause
16
16
17
17
import numpy as np
18
+ import copy
18
19
19
20
from abc import ABCMeta
20
21
from .base import BaseEstimator , clone
21
22
from .base import RegressorMixin , ClassifierMixin
22
23
from .utils import check_array , check_X_y
23
24
from .utils .fixes import parallel_helper
24
25
from .utils .validation import check_is_fitted , has_fit_parameter
26
+ from .utils .metaestimators import if_delegate_has_method
25
27
from .externals .joblib import Parallel , delayed
26
28
from .externals import six
27
29
@@ -37,12 +39,87 @@ def _fit_estimator(estimator, X, y, sample_weight=None):
37
39
return estimator
38
40
39
41
42
+ def _partial_fit_estimator (estimator , X , y , classes = None , sample_weight = None ,
43
+ first_time = True ):
44
+ if first_time :
45
+ estimator = clone (estimator )
46
+ else :
47
+ estimator = copy .copy (estimator )
48
+
49
+ if sample_weight is not None :
50
+ if classes is not None :
51
+ estimator .partial_fit (X , y , classes = classes ,
52
+ sample_weight = sample_weight )
53
+ else :
54
+ estimator .partial_fit (X , y , sample_weight = sample_weight )
55
+ else :
56
+ if classes is not None :
57
+ estimator .partial_fit (X , y , classes = classes )
58
+ else :
59
+ estimator .partial_fit (X , y )
60
+ return estimator
61
+
62
+
40
63
class MultiOutputEstimator (six .with_metaclass (ABCMeta , BaseEstimator )):
41
64
42
65
def __init__ (self , estimator , n_jobs = 1 ):
43
66
self .estimator = estimator
44
67
self .n_jobs = n_jobs
45
68
69
+ @if_delegate_has_method ('estimator' )
70
+ def partial_fit (self , X , y , classes = None , sample_weight = None ):
71
+ """ Fit linear model with Stochastic Gradient Descent..
72
+ Fit a separate model for each output variable.
73
+
74
+ Parameters
75
+ ----------
76
+ X : (sparse) array-like, shape (n_samples, n_features)
77
+ Data.
78
+
79
+ y : (sparse) array-like, shape (n_samples, n_outputs)
80
+ Multi-output targets. An indicator matrix turns on multilabel
81
+ estimation.
82
+
83
+ classes : array, shape (n_classes, n_outputs)
84
+ Classes across all calls to partial_fit.
85
+ Can be obtained by via `[np.unique(y[:, i]) for i in xrange(y.shape[1])]`, where y is the
86
+ target matrix of the entire dataset.
87
+ This argument is required for the first call to partial_fit
88
+ and can be omitted in the subsequent calls.
89
+ Note that y doesn't need to contain all labels in `classes`.
90
+
91
+ sample_weight : array-like, shape = (n_samples) or None
92
+ Sample weights. If None, then samples are equally weighted.
93
+ Only supported if the underlying regressor supports sample
94
+ weights.
95
+
96
+ Returns
97
+ -------
98
+ self : object
99
+ Returns self.
100
+ """
101
+
102
+ X , y = check_X_y (X , y ,
103
+ multi_output = True ,
104
+ accept_sparse = True )
105
+
106
+ if y .ndim == 1 :
107
+ raise ValueError ("y must have at least two dimensions for "
108
+ "multi target regression but has only one." )
109
+
110
+ if (sample_weight is not None and
111
+ not has_fit_parameter (self .estimator , 'sample_weight' )):
112
+ raise ValueError ("Underlying regressor does not support"
113
+ " sample weights." )
114
+
115
+ first_time = not hasattr (self , 'estimators_' )
116
+
117
+ self .estimators_ = Parallel (n_jobs = self .n_jobs )(delayed (_partial_fit_estimator )(
118
+ self .estimators_ [i ] if not first_time else self .estimator ,
119
+ X , y [:, i ],
120
+ classes [:, i ] if classes is not None else None , sample_weight , first_time ) for i in xrange (y .shape [1 ]))
121
+ return self
122
+
46
123
def fit (self , X , y , sample_weight = None ):
47
124
""" Fit the model to data.
48
125
Fit a separate model for each output variable.
@@ -68,7 +145,8 @@ def fit(self, X, y, sample_weight=None):
68
145
"""
69
146
70
147
if not hasattr (self .estimator , "fit" ):
71
- raise ValueError ("The base estimator should implement a fit method" )
148
+ raise ValueError (
149
+ "The base estimator should implement a fit method" )
72
150
73
151
X , y = check_X_y (X , y ,
74
152
multi_output = True ,
@@ -84,7 +162,7 @@ def fit(self, X, y, sample_weight=None):
84
162
" sample weights." )
85
163
86
164
self .estimators_ = Parallel (n_jobs = self .n_jobs )(delayed (_fit_estimator )(
87
- self .estimator , X , y [:, i ], sample_weight ) for i in range (y .shape [1 ]))
165
+ self .estimator , X , y [:, i ], sample_weight ) for i in xrange (y .shape [1 ]))
88
166
return self
89
167
90
168
def predict (self , X ):
@@ -104,7 +182,8 @@ def predict(self, X):
104
182
"""
105
183
check_is_fitted (self , 'estimators_' )
106
184
if not hasattr (self .estimator , "predict" ):
107
- raise ValueError ("The base estimator should implement a predict method" )
185
+ raise ValueError (
186
+ "The base estimator should implement a predict method" )
108
187
109
188
X = check_array (X , accept_sparse = True )
110
189
@@ -133,9 +212,36 @@ class MultiOutputRegressor(MultiOutputEstimator, RegressorMixin):
133
212
using `n_jobs>1` can result in slower performance due
134
213
to the overhead of spawning processes.
135
214
"""
215
+
136
216
def __init__ (self , estimator , n_jobs = 1 ):
137
217
super (MultiOutputRegressor , self ).__init__ (estimator , n_jobs )
138
218
219
+ def partial_fit (self , X , y , sample_weight = None ):
220
+ """ Fit linear model with Stochastic Gradient Descent..
221
+ Fit a separate model for each output variable.
222
+
223
+ Parameters
224
+ ----------
225
+ X : (sparse) array-like, shape (n_samples, n_features)
226
+ Data.
227
+
228
+ y : (sparse) array-like, shape (n_samples, n_outputs)
229
+ Multi-output targets. An indicator matrix turns on multilabel
230
+ estimation.
231
+
232
+ sample_weight : array-like, shape = (n_samples) or None
233
+ Sample weights. If None, then samples are equally weighted.
234
+ Only supported if the underlying regressor supports sample
235
+ weights.
236
+
237
+ Returns
238
+ -------
239
+ self : object
240
+ Returns self.
241
+ """
242
+ super (MultiOutputRegressor , self ).partial_fit (
243
+ X , y , sample_weight = sample_weight )
244
+
139
245
def score (self , X , y , sample_weight = None ):
140
246
"""Returns the coefficient of determination R^2 of the prediction.
141
247
@@ -223,7 +329,7 @@ def predict_proba(self, X):
223
329
"predict_proba method" )
224
330
225
331
results = np .dstack ([estimator .predict_proba (X ) for estimator in
226
- self .estimators_ ])
332
+ self .estimators_ ])
227
333
return results
228
334
229
335
def score (self , X , y ):
0 commit comments