8000 go back to counting non-improvement iterations · scikit-learn/scikit-learn@6a33c2d · GitHub
[go: up one dir, main page]

Skip to content

Commit 6a33c2d

Browse files
committed
go back to counting non-improvement iterations
1 parent 35ea454 commit 6a33c2d

File tree

2 files changed

+37
-12
lines changed

2 files changed

+37
-12
lines changed

sklearn/neural_network/multilayer_perceptron.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,12 @@ def _initialize(self, y, layer_units):
273273
self.intercepts_]
274274
self._coef_velocity = [np.zeros_like(coefs) for coefs in
275275
self.coefs_]
276+
self._no_improvement_count = 0
276277
if self.early_stopping:
277278
self.validation_scores_ = []
279+
self.best_validation_score_ = -np.inf
280+
else:
281+
self.best_loss_ = np.inf
278282

279283
def _init_coef(self, fan_in, fan_out, rng):
280284
if self.activation == 'logistic':
@@ -424,7 +428,9 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
424428
# early_stopping in partial_fit doesn't make sense
425429
early_stopping = self.early_stopping and not incremental
426430
if early_stopping:
427-
X, X_val, y, y_val = train_test_split(X, y, random_state=self.random_state)
431+
X, X_val, y, y_val = train_test_split(X, y,
432+
random_state=self.random_state,
433+
test_size=.1)
428434
y_val = self.label_binarizer_.inverse_transform(y_val)
429435

430436
n_samples = X.shape[0]
@@ -476,30 +482,44 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
476482
if self.learning_rate == 'invscaling':
477483
self.learning_rate_ = (self.learning_rate_init /
478484
(self.t_ + 1) ** self.power_t)
479-
# stopping criteria
485+
# validation set evaluation
480486
if early_stopping:
481487
# compute validation score, use that for stopping
482488
self.validation_scores_.append(self.score(X_val, y_val))
483489

484490
if self.verbose:
485491
print("Validation score: %f" % (self.validation_scores_[-1]))
492+
# update best parameters
486493
# use validation_scores_, not loss_curve_
487494
# let's hope no-one overloads .score with mse
488-
sign = -1
489-
losses = self.validation_scores_
495+
if self.validation_scores_[-1] > self.best_validation_score_:
496+
self.best_validation_score_ = self.validation_scores_[-1]
497+
self._best_coefs = [c for c in self.coefs_]
498+
self._best_intercepts = [i for i in self.intercepts_]
499+
500+
if self.validation_scores_[-1] < self.best_validation_score_ + self.tol:
501+
self._no_improvement_count += 1
502+
else:
503+
self._no_improvement_count = 0
504+
490505
else:
491-
sign = 1
492-
losses = self.loss_curve_
506+
if self.loss_curve_[-1] < self.best_loss_:
507+
self.best_loss_ = self.loss_curve_[-1]
508+
if self.loss_curve_[-1] > self.best_loss_ - self.tol:
509+
self._no_improvement_count += 1
510+
else:
511+
self._no_improvement_count = 0
493512

494-
if len(losses) > 3 and np.all(sign * np.array(losses[-6:-1])
495-
< sign * losses[-1] + self.tol):
513+
# stopping criteria
514+
if self._no_improvement_count > 2:
496515
# not better than last two iterations by tol.
497516
# stop or decreate learning rate
498-
msg = ("Training loss did not improve more than tol for five"
517+
msg = ("Training loss did not improve more than tol for two"
499518
" consecutive epochs.")
500519
if self.learning_rate == 'adaptive':
501520
if self.learning_rate_ > 1e-6:
502521
self.learning_rate_ /= 5
522+
self._no_improvement_count = 0
503523
if self.verbose:
504524
print(msg + " Setting learning rate to %f" % self.learning_rate_)
505525
else:
@@ -522,6 +542,11 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
522542
except KeyboardInterrupt:
523543
pass
524544

545+
if early_stopping:
546+
# restore best weights
547+
self.coefs_ = self._best_coefs
548+
self.intercepts_ = self._best_intercepts
549+
525550
def fit(self, X, y):
526551
"""Fit the model to the data X and target y.
527552

sklearn/neural_network/tests/test_mlp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ def test_fit():
102102

103103
mlp.out_activation_ = 'logistic'
104104
mlp.t_ = 0
105-
mlp._best_loss = np.inf
105+
mlp.best_loss_ = np.inf
106106
mlp.loss_curve_ = []
107-
mlp._loss_increase_count = 0
107+
mlp._no_improvement_count = 0
108108
mlp._intercept_velocity = [np.zeros_like(intercepts) for
109109
intercepts in
110110
mlp.intercepts_]
@@ -450,7 +450,7 @@ def test_tolerance():
450450
# It should force the algorithm to exit the loop when it converges.
451451
X = [[3, 2], [1, 6]]
452452
y = [1, 0]
453-
clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd')
453+
clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd', verbose=10)
454454
clf.fit(X, y)
455455
assert_greater(clf.max_iter, clf.n_iter_)
456456

0 commit comments

Comments
 (0)
0