go back to counting non-improvement iterations

amueller · amueller · commit 6a33c2d5305c · 2015-06-04T17:27:03.000-04:00
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
@@ -273,8 +273,12 @@ def _initialize(self, y, layer_units):
                                         self.intercepts_]
             self._coef_velocity = [np.zeros_like(coefs) for coefs in
                                    self.coefs_]
+            self._no_improvement_count = 0
             if self.early_stopping:
                 self.validation_scores_ = []
+                self.best_validation_score_ = -np.inf
+            else:
+                self.best_loss_ = np.inf
 
     def _init_coef(self, fan_in, fan_out, rng):
         if self.activation == 'logistic':
@@ -424,7 +428,9 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
         # early_stopping in partial_fit doesn't make sense
         early_stopping = self.early_stopping and not incremental
         if early_stopping:
-            X, X_val, y, y_val = train_test_split(X, y, random_state=self.random_state)
+            X, X_val, y, y_val = train_test_split(X, y,
+                                                  random_state=self.random_state,
+                                                  test_size=.1)
             y_val = self.label_binarizer_.inverse_transform(y_val)
 
         n_samples = X.shape[0]
@@ -476,30 +482,44 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
                 if self.learning_rate == 'invscaling':
                     self.learning_rate_ = (self.learning_rate_init /
                                            (self.t_ + 1) ** self.power_t)
-                # stopping criteria
+                # validation set evaluation
                 if early_stopping:
                     # compute validation score, use that for stopping
                     self.validation_scores_.append(self.score(X_val, y_val))
 
                     if self.verbose:
                         print("Validation score: %f" % (self.validation_scores_[-1]))
+                    # update best parameters
                     # use validation_scores_, not loss_curve_
                     # let's hope no-one overloads .score with mse
-                    sign = -1
-                    losses = self.validation_scores_
+                    if self.validation_scores_[-1] > self.best_validation_score_:
+                        self.best_validation_score_ = self.validation_scores_[-1]
+                        self._best_coefs = [c for c in self.coefs_]
+                        self._best_intercepts = [i for i in self.intercepts_]
+
+                    if self.validation_scores_[-1] < self.best_validation_score_ + self.tol:
+                        self._no_improvement_count += 1
+                    else:
+                        self._no_improvement_count = 0
+
                 else:
-                    sign = 1
-                    losses = self.loss_curve_
+                    if self.loss_curve_[-1] < self.best_loss_:
+                        self.best_loss_ = self.loss_curve_[-1]
+                    if self.loss_curve_[-1] > self.best_loss_ - self.tol:
+                        self._no_improvement_count += 1
+                    else:
+                        self._no_improvement_count = 0
 
-                if len(losses) > 3 and np.all(sign * np.array(losses[-6:-1])
-                                              < sign * losses[-1] + self.tol):
+                # stopping criteria
+                if self._no_improvement_count > 2:
                     # not better than last two iterations by tol.
                     # stop or decreate learning rate
-                    msg = ("Training loss did not improve more than tol for five"
+                    msg = ("Training loss did not improve more than tol for two"
                            " consecutive epochs.")
                     if self.learning_rate == 'adaptive':
                         if self.learning_rate_ > 1e-6:
                             self.learning_rate_ /= 5
+                            self._no_improvement_count = 0
                             if self.verbose:
                                 print(msg + " Setting learning rate to %f" % self.learning_rate_)
                         else:
@@ -522,6 +542,11 @@ def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads,
         except KeyboardInterrupt:
             pass
 
+        if early_stopping:
+            # restore best weights
+            self.coefs_ = self._best_coefs
+            self.intercepts_ = self._best_intercepts
+
     def fit(self, X, y):
         """Fit the model to the data X and target y.
 
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
@@ -102,9 +102,9 @@ def test_fit():
 
     mlp.out_activation_ = 'logistic'
     mlp.t_ = 0
-    mlp._best_loss = np.inf
+    mlp.best_loss_ = np.inf
     mlp.loss_curve_ = []
-    mlp._loss_increase_count = 0
+    mlp._no_improvement_count = 0
     mlp._intercept_velocity = [np.zeros_like(intercepts) for
                                intercepts in
                                mlp.intercepts_]
@@ -450,7 +450,7 @@ def test_tolerance():
     # It should force the algorithm to exit the loop when it converges.
     X = [[3, 2], [1, 6]]
     y = [1, 0]
-    clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd')
+    clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd', verbose=10)
     clf.fit(X, y)
     assert_greater(clf.max_iter, clf.n_iter_)