8000 BUG refactor SGD classes to not store sample_weight · seckcoder/scikit-learn@5b00f44 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5b00f44

Browse files
committed
BUG refactor SGD classes to not store sample_weight
1 parent 9ae9844 commit 5b00f44

File tree

3 files changed

+24
-24
lines changed

3 files changed

+24
-24
lines changed

sklearn/linear_model/base.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -237,16 +237,16 @@ def _set_penalty_type(self, penalty):
237237
except KeyError:
238238
raise ValueError("Penalty %s is not supported. " % penalty)
239239

240-
def _set_sample_weight(self, sample_weight, n_samples):
240+
def _validate_sample_weight(self, sample_weight, n_samples):
241241
"""Set the sample weight array."""
242242
if sample_weight == None:
243243
sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
244244
else:
245245
sample_weight = np.asarray(sample_weight, dtype=np.float64,
246246
order="C")
247-
self.sample_weight = sample_weight
248-
if self.sample_weight.shape[0] != n_samples:
247+
if sample_weight.shape[0] != n_samples:
249248
raise ValueError("Shapes of X and sample_weight do not match.")
249+
return sample_weight
250250

251251
def _set_coef(self, coef_):
252252
"""Make sure that coef_ is 2d. """
@@ -400,27 +400,27 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
400400

401401
# Allocate datastructures from input arguments
402402
self._set_class_weight(class_weight, self.classes, y)
403-
self._set_sample_weight(sample_weight, n_samples)
403+
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
404404
self._allocate_parameter_mem(n_classes, n_features,
405405
coef_init, intercept_init)
406406

407407
# delegate to concrete training procedure
408408
if n_classes > 2:
409-
self._fit_multiclass(X, y)
409+
self._fit_multiclass(X, y, sample_weight)
410410
elif n_classes == 2:
411-
self._fit_binary(X, y)
411+
self._fit_binary(X, y, sample_weight)
412412
else:
413413
raise ValueError("The number of class labels must be "
414414
"greater than one.")
415415
# return self for chaining fit and predict calls
416416
return self
417417

418418
@abstractmethod
419-
def _fit_binary(self, X, y):
419+
def _fit_binary(self, X, y, sample_weight):
420420
"""Fit binary classifier."""
421421

422422
@abstractmethod
423-
def _fit_multiclass(self, X, y):
423+
def _fit_multiclass(self, X, y, sample_weight):
424424
"""Fit multiclass classifier."""
425425

426426
def decision_function(self, X):
@@ -547,15 +547,15 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
547547
n_samples, n_features = X.shape
548548

549549
# Allocate datastructures from input arguments
550-
self._set_sample_weight(sample_weight, n_samples)
550+
sample_weight = self._validate_sample_weight(sample_weight, n_samples)
551551
self._allocate_parameter_mem(1, n_features,
552552
coef_init, intercept_init)
553553

554-
self._fit_regressor(X, y)
554+
self._fit_regressor(X, y, sample_weight)
555555
return self
556556

557557
@abstractmethod
558-
def _fit_regressor(self, X, y):
558+
def _fit_regressor(self, X, y, sample_weight):
559559
"""Fit regression model."""
560560

561561
def predict(self, X):

sklearn/linear_model/sparse/stochastic_gradient.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class SGDClassifier(BaseSGDClassifier):
137137
138138
"""
139139

140-
def _fit_binary(self, X, y):
140+
def _fit_binary(self, X, y, sample_weight):
141141
"""Fit a binary classifier."""
142142
X = _tocsr(X)
143143

@@ -165,15 +165,15 @@ def _fit_binary(self, X, y):
165165
int(self.seed),
166166
self._expanded_class_weight[1],
167167
self._expanded_class_weight[0],
168-
self.sample_weight,
168+
sample_weight,
169169
self.learning_rate_code,
170170
self.eta0, self.power_t)
171171

172172
# update self.coef_ and self.sparse_coef_ consistently
173173
self._set_coef(coef_)
174174
self.intercept_ = np.asarray(intercept_)
175175

176-
def _fit_multiclass(self, X, y):
176+
def _fit_multiclass(self, X, y, sample_weight):
177177
"""Fit a multi-class classifier as a combination of binary classifiers
178178
179179
Each binary classifier predicts one class versus all others
@@ -197,7 +197,7 @@ def _fit_multiclass(self, X, y):
197197
self.verbose, self.shuffle,
198198
self.seed,
199199
self._e B41A xpanded_class_weight[i],
200-
self.sample_weight,
200+
sample_weight,
201201
self.learning_rate_code,
202202
self.eta0, self.power_t)
203203
for i, c in enumerate(self.classes))
@@ -330,8 +330,8 @@ class SGDRegressor(BaseSGDRegressor):
330330
331331
"""
332332

333-
def _fit_regressor(self, X, y):
334-
# interprete X as CSR matrix
333+
def _fit_regressor(self, X, y, sample_weight):
334+
# interpret X as CSR matrix
335335
X = _tocsr(X)
336336

337337
# get sparse matrix datastructures
@@ -352,7 +352,7 @@ def _fit_regressor(self, X, y):
352352
int(self.shuffle),
353353
int(self.seed),
354354
1.0, 1.0,
355-
self.sample_weight,
355+
sample_weight,
356356
self.learning_rate_code,
357357
self.eta0, self.power_t)
358358

sklearn/linear_model/stochastic_gradient.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class SGDClassifier(BaseSGDClassifier):
125125
126126
"""
127127

128-
def _fit_binary(self, X, y):
128+
def _fit_binary(self, X, y, sample_weight):
129129
"""Fit a single binary classifier"""
130130
# interprete X as dense array
131131
X = np.asarray(X, dtype=np.float64, order='C')
@@ -148,14 +148,14 @@ def _fit_binary(self, X, y):
148148
self.seed,
149149
self._expanded_class_weight[1],
150150
self._expanded_class_weight[0],
151-
self.sample_weight,
151+
sample_weight,
152152
self.learning_rate_code, self.eta0,
153153
self.power_t)
154154

155155
self._set_coef(coef_)
156156
self.intercept_ = np.asarray(intercept_)
157157

158-
def _fit_multiclass(self, X, y):
158+
def _fit_multiclass(self, X, y, sample_weight):
159159
"""Fit a multi-class classifier by combining binary classifiers
160160
161161
Each binary classifier predicts one class versus all others. This
@@ -174,7 +174,7 @@ def _fit_multiclass(self, X, y):
174174
self.verbose, self.shuffle,
175175
self.seed,
176176
self._expanded_class_weight[i],
177-
self.sample_weight,
177+
sample_weight,
178178
self.learning_rate_code,
179179
self.eta0, self.power_t)
180180
for i, c in enumerate(self.classes))
@@ -301,7 +301,7 @@ class SGDRegressor(BaseSGDRegressor):
301301
302302
"""
303303

304-
def _fit_regressor(self, X, y):
304+
def _fit_regressor(self, X, y, sample_weight):
305305
X = np.asarray(X, dtype=np.float64, order='C')
306306
coef_, intercept_ = plain_sgd(self.coef_,
307307
self.intercept_,
@@ -315,7 +315,7 @@ def _fit_regressor(self, X, y):
315315
int(self.shuffle),
316316
self.seed,
317317
1.0, 1.0,
318-
self.sample_weight,
318+
sample_weight,
319319
self.learning_rate_code,
320320
self.eta0, self.power_t)
321321

0 commit comments

Comments
 (0)
0