1
+ from __future__ import division
1
2
import numpy as np
2
3
import scipy .sparse as sp
3
4
from sklearn .utils import shuffle
4
5
from sklearn .utils .testing import assert_almost_equal
5
6
from sklearn .utils .testing import assert_raises
7
+ from sklearn .utils .testing import assert_false
6
8
from sklearn .utils .testing import assert_raises_regex
7
9
from sklearn .utils .testing import assert_array_equal
8
10
from sklearn .utils .testing import assert_equal
11
+ from sklearn .utils .testing import assert_not_equal
12
+ from sklearn .utils .testing import assert_array_almost_equal
9
13
from sklearn .exceptions import NotFittedError
10
14
from sklearn import datasets
11
15
from sklearn .base import clone
12
16
from sklearn .ensemble import GradientBoostingRegressor , RandomForestClassifier
13
- from sklearn .linear_model import Lasso , LogisticRegression
17
+ from sklearn .linear_model import Lasso
18
+ from sklearn .linear_model import SGDClassifier
19
+ from sklearn .linear_model import SGDRegressor
20
+ from sklearn .linear_model import LogisticRegression
14
21
from sklearn .svm import LinearSVC
15
22
from sklearn .multiclass import OneVsRestClassifier
16
23
from sklearn .multioutput import MultiOutputRegressor , MultiOutputClassifier
@@ -25,7 +32,7 @@ def test_multi_target_regression():
25
32
for n in range (3 ):
26
33
rgr = GradientBoostingRegressor (random_state = 0 )
27
34
rgr .fit (X_train , y_train [:, n ])
28
- references [:,n ] = rgr .predict (X_test )
35
+ references [:, n ] = rgr .predict (X_test )
29
36
30
37
rgr = MultiOutputRegressor (GradientBoostingRegressor (random_state = 0 ))
31
38
rgr .fit (X_train , y_train )
@@ -34,20 +41,40 @@ def test_multi_target_regression():
34
41
assert_almost_equal (references , y_pred )
35
42
36
43
44
+ def test_multi_target_regression_partial_fit ():
45
+ X , y = datasets .make_regression (n_targets = 3 )
46
+ X_train , y_train = X [:50 ], y [:50 ]
47
+ X_test , y_test = X [50 :], y [50 :]
48
+
49
+ references = np .zeros_like (y_test )
50
+ half_index = 25
51
+ for n in range (3 ):
52
+ sgr = SGDRegressor (random_state = 0 )
53
+ sgr .partial_fit (X_train [:half_index ], y_train [:half_index , n ])
54
+ sgr .partial_fit (X_train [half_index :], y_train [half_index :, n ])
55
+ references [:, n ] = sgr .predict (X_test )
56
+
57
+ sgr = MultiOutputRegressor (SGDRegressor (random_state = 0 ))
58
+
59
+ sgr .partial_fit (X_train [:half_index ], y_train [:half_index ])
60
+ sgr .partial_fit (X_train [half_index :], y_train [half_index :])
61
+
62
+ y_pred = sgr .predict (X_test )
63
+ assert_almost_equal (references , y_pred )
64
+
65
+
37
66
def test_multi_target_regression_one_target ():
38
67
# Test multi target regression raises
39
68
X , y = datasets .make_regression (n_targets = 1 )
40
- X_train , y_train = X [:50 ], y [:50 ]
41
- X_test , y_test = X [50 :], y [50 :]
42
69
43
70
rgr = MultiOutputRegressor (GradientBoostingRegressor (random_state = 0 ))
44
- assert_raises (ValueError , rgr .fit , X_train , y_train )
71
+ assert_raises (ValueError , rgr .fit , X , y )
45
72
46
73
47
74
def test_multi_target_sparse_regression ():
48
75
X , y = datasets .make_regression (n_targets = 3 )
49
76
X_train , y_train = X [:50 ], y [:50 ]
50
- X_test , y_test = X [ 50 :], y [50 :]
77
+ X_test = X [50 :]
51
78
52
79
for sparse in [sp .csr_matrix , sp .csc_matrix , sp .coo_matrix , sp .dok_matrix ,
53
80
sp .lil_matrix ]:
@@ -57,11 +84,12 @@ def test_multi_target_sparse_regression():
57
84
rgr .fit (X_train , y_train )
58
85
rgr_sparse .fit (sparse (X_train ), y_train )
59
86
60
- assert_almost_equal (rgr .predict (X_test ), rgr_sparse .predict (sparse (X_test )))
87
+ assert_almost_equal (rgr .predict (X_test ),
88
+ rgr_sparse .predict (sparse (X_test )))
61
89
62
90
63
91
def test_multi_target_sample_weights_api ():
64
- X = [[1 ,2 , 3 ], [4 ,5 , 6 ]]
92
+ X = [[1 , 2 , 3 ], [4 , 5 , 6 ]]
65
93
y = [[3.141 , 2.718 ], [2.718 , 3.141 ]]
66
94
w = [0.8 , 0.6 ]
67
95
@@ -74,23 +102,40 @@ def test_multi_target_sample_weights_api():
74
102
rgr .fit (X , y , w )
75
103
76
104
105
+ def test_multi_target_sample_weight_partial_fit ():
106
+ # weighted regressor
107
+ X = [[1 , 2 , 3 ], [4 , 5 , 6 ]]
108
+ y = [[3.141 , 2.718 ], [2.718 , 3.141 ]]
109
+ w = [2. , 1. ]
110
+ rgr_w = MultiOutputRegressor (SGDRegressor (random_state = 0 ))
111
+ rgr_w .partial_fit (X , y , w )
112
+
113
+ # weighted with different weights
114
+ w = [2. , 2. ]
115
+ rgr = MultiOutputRegressor (SGDRegressor (random_state = 0 ))
116
+ rgr .partial_fit (X , y , w )
117
+
118
+ assert_not_equal (rgr .predict (X )[0 ][0 ], rgr_w .predict (X )[0 ][0 ])
119
+
120
+
77
121
def test_multi_target_sample_weights ():
78
122
# weighted regressor
79
- Xw = [[1 ,2 , 3 ], [4 ,5 , 6 ]]
123
+ Xw = [[1 , 2 , 3 ], [4 , 5 , 6 ]]
80
124
yw = [[3.141 , 2.718 ], [2.718 , 3.141 ]]
81
125
w = [2. , 1. ]
82
126
rgr_w = MultiOutputRegressor (GradientBoostingRegressor (random_state = 0 ))
83
127
rgr_w .fit (Xw , yw , w )
84
128
85
129
# unweighted, but with repeated samples
86
- X = [[1 ,2 , 3 ], [1 ,2 , 3 ], [4 ,5 , 6 ]]
130
+ X = [[1 , 2 , 3 ], [1 , 2 , 3 ], [4 , 5 , 6 ]]
87
131
y = [[3.141 , 2.718 ], [3.141 , 2.718 ], [2.718 , 3.141 ]]
88
132
rgr = MultiOutputRegressor (GradientBoostingRegressor (random_state = 0 ))
89
133
rgr .fit (X , y )
90
134
91
- X_test = [[1.5 ,2.5 ,3.5 ], [3.5 ,4.5 ,5.5 ]]
135
+ X_test = [[1.5 , 2.5 , 3.5 ], [3.5 , 4.5 , 5.5 ]]
92
136
assert_almost_equal (rgr .predict (X_test ), rgr_w .predict (X_test ))
93
137
138
+
94
139
# Import the data
95
140
iris = datasets .load_iris ()
96
141
# create a multiple targets by randomized shuffling and concatenating y.
@@ -102,6 +147,57 @@ def test_multi_target_sample_weights():
102
147
n_samples , n_features = X .shape
103
148
n_outputs = y .shape [1 ]
104
149
n_classes = len (np .unique (y1 ))
150
+ classes = list (map (np .unique , (y1 , y2 , y3 )))
151
+
152
+
153
+ def test_multi_output_classification_partial_fit_parallelism ():
154
+ sgd_linear_clf = SGDClassifier (loss = 'log' , random_state = 1 )
155
+ mor = MultiOutputClassifier (sgd_linear_clf , n_jobs = - 1 )
156
+ mor .partial_fit (X , y , classes )
157
+ est1 = mor .estimators_ [0 ]
158
+ mor .partial_fit (X , y )
159
+ est2 = mor .estimators_ [0 ]
160
+ # parallelism requires this to be the case for a sane implementation
161
+ assert_false (est1 is est2 )
162
+
163
+
164
+ def test_multi_output_classification_partial_fit ():
165
+ # test if multi_target initializes correctly with base estimator and fit
166
+ # assert predictions work as expected for predict
167
+
168
+ sgd_linear_clf = SGDClassifier (loss = 'log' , random_state = 1 )
169
+ multi_target_linear = MultiOutputClassifier (sgd_linear_clf )
170
+
171
+ # train the multi_target_linear and also get the predictions.
172
+ half_index = X .shape [0 ] // 2
173
+ multi_target_linear .partial_fit (
174
+ X [:half_index ], y [:half_index ], classes = classes )
175
+
176
+ first_predictions = multi_target_linear .predict (X )
177
+ assert_equal ((n_samples , n_outputs ), first_predictions .shape )
178
+
179
+ multi_target_linear .partial_fit (X [half_index :], y [half_index :])
180
+ second_predictions = multi_target_linear .predict (X )
181
+ assert_equal ((n_samples , n_outputs ), second_predictions .shape )
182
+
183
+ # train the linear classification with each column and assert that
184
+ # predictions are equal after first partial_fit and second partial_fit
185
+ for i in range (3 ):
186
+ # create a clone with the same state
187
+ sgd_linear_clf = clone (sgd_linear_clf )
188
+ sgd_linear_clf .partial_fit (
189
+ X [:half_index ], y [:half_index , i ], classes = classes [i ])
190
+ assert_array_equal (sgd_linear_clf .predict (X ), first_predictions [:, i ])
191
+ sgd_linear_clf .partial_fit (X [half_index :], y [half_index :, i ])
192
+ assert_array_equal (sgd_linear_clf .predict (X ), second_predictions [:, i ])
193
+
194
+
195
+ def test_mutli_output_classifiation_partial_fit_no_first_classes_exception ():
196
+ sgd_linear_clf = SGDClassifier (loss = 'log' , random_state = 1 )
197
+ multi_target_linear = MultiOutputClassifier (sgd_linear_clf )
198
+ assert_raises_regex (ValueError , "classes must be passed on the first call "
199
+ "to partial_fit." ,
200
+ multi_target_linear .partial_fit , X , y )
105
201
106
202
107
203
def test_multi_output_classification ():
@@ -209,6 +305,25 @@ def test_multi_output_classification_sample_weights():
209
305
assert_almost_equal (clf .predict (X_test ), clf_w .predict (X_test ))
210
306
211
307
308
+ def test_multi_output_classification_partial_fit_sample_weights ():
309
+ # weighted classifier
310
+ Xw = [[1 , 2 , 3 ], [4 , 5 , 6 ], [1.5 , 2.5 , 3.5 ]]
311
+ yw = [[3 , 2 ], [2 , 3 ], [3 , 2 ]]
312
+ w = np .asarray ([2. , 1. , 1. ])
313
+ sgd_linear_clf = SGDClassifier (random_state = 1 )
314
+ clf_w = MultiOutputClassifier (sgd_linear_clf )
315
+ clf_w .fit (Xw , yw , w )
316
+
317
+ # unweighted, but with repeated samples
318
+ X = [[1 , 2 , 3 ], [1 , 2 , 3 ], [4 , 5 , 6 ], [1.5 , 2.5 , 3.5 ]]
319
+ y = [[3 , 2 ], [3 , 2 ], [2 , 3 ], [3 , 2 ]]
320
+ sgd_linear_clf = SGDClassifier (random_state = 1 )
321
+ clf = MultiOutputClassifier (sgd_linear_clf )
322
+ clf .fit (X , y )
323
+ X_test = [[1.5 , 2.5 , 3.5 ]]
324
+ assert_array_almost_equal (clf .predict (X_test ), clf_w .predict (X_test ))
325
+
326
+
212
327
def test_multi_output_exceptions ():
213
328
# NotFittedError when fit is not done but score, predict and
214
329
# and predict_proba are called
0 commit comments