scikit-learn
diff --git a/‎sklearn/linear_model/logistic.py
Lines changed: 22 additions & 12 deletions b/‎sklearn/linear_model/logistic.py
Lines changed: 22 additions & 12 deletions
diff --git a/‎sklearn/linear_model/sag.py
Lines changed: 72 additions & 0 deletions b/‎sklearn/linear_model/sag.py
Lines changed: 72 additions & 0 deletions
diff --git a/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 36 additions & 15 deletions b/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 36 additions & 15 deletions
@@ -16,6 +16,7 @@
 from scipy import optimize, sparse
 
 from .base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
+from .sag import sag_logistic
 from ..feature_selection.from_model import _LearntSelectorMixin
 from ..preprocessing import LabelEncoder, LabelBinarizer
 from ..svm.base import _fit_liblinear
@@ -395,7 +396,8 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                              max_iter=100, tol=1e-4, verbose=0,
                              solver='lbfgs', coef=None, copy=True,
                              class_weight=None, dual=False, penalty='l2',
-                             intercept_scaling=1., multi_class='ovr'):
+                             intercept_scaling=1., multi_class='ovr',
+                             random_state=None):
     """Compute a Logistic Regression model for a list of regularization
     parameters.
 
@@ -481,7 +483,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         chosen is 'ovr', then a binary problem is fit for each label. Else
         the loss minimised is the multinomial loss fit across
         the entire probability distribution. Works only for the 'lbfgs'
-        solver.
+        and 'newton-cg' solvers.
+
+    random_state : int seed, RandomState instance, or None (default)
+        The seed of the pseudo random number generator to use when
+        shuffling the data.
 
     Returns
     -------
@@ -505,20 +511,20 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         raise ValueError("multi_class can be either 'multinomial' or 'ovr'"
                          "got %s" % multi_class)
 
-    if solver not in ['liblinear', 'newton-cg', 'lbfgs']:
+    if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag']:
         raise ValueError("Logistic Regression supports only liblinear,"
-                         " newton-cg and lbfgs solvers. got %s" % solver)
+                         " newton-cg, lbfgs, and sag solvers. got %s" % solver)
 
     if multi_class == 'multinomial' and solver == 'liblinear':
         raise ValueError("Solver %s cannot solve problems with "
                          "a multinomial backend." % solver)
 
     if solver != 'liblinear':
         if penalty != 'l2':
-            raise ValueError("newton-cg and lbfgs solvers support only "
+            raise ValueError("newton-cg, lbfgs and sag solvers support only "
                              "l2 penalties, got %s penalty." % penalty)
         if dual:
-            raise ValueError("newton-cg and lbfgs solvers support only "
+            raise ValueError("newton-cg, lbfgs and sag solvers support only "
                              "dual=False, got dual=%s" % dual)
     # Preprocessing.
     X = check_array(X, accept_sparse='csr', dtype=np.float64)
@@ -660,17 +666,21 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                 w0 = np.concatenate([coef_.ravel(), intercept_])
             else:
                 w0 = coef_.ravel()
+        elif solver == 'sag':
+            w0 = sag_logistic(X, y, w0, 1. / C, sample_weight,
+                              max_iter=max_iter, tol=tol, verbose=verbose,
+                              random_state=random_state)
         else:
             raise ValueError("solver must be one of {'liblinear', 'lbfgs', "
-                             "'newton-cg'}, got '%s' instead" % solver)
+                             "'newton-cg', 'sag'}, got '%s' instead" % solver)
 
         if multi_class == 'multinomial':
             multi_w0 = np.reshape(w0, (classes.size, -1))
             if classes.size == 2:
                 multi_w0 = multi_w0[1][np.newaxis, :]
             coefs.append(multi_w0)
         else:
-            coefs.append(w0)
+            coefs.append(np.copy(w0))
     return coefs, np.array(Cs)
 
 
@@ -1015,10 +1025,10 @@ def fit(self, X, y):
 
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
         self.classes_ = np.unique(y)
-        if self.solver not in ['liblinear', 'newton-cg', 'lbfgs']:
+        if self.solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag']:
             raise ValueError(
-                "Logistic Regression supports only liblinear, newton-cg and "
-                "lbfgs solvers, Got solver=%s" % self.solver
+                "Logistic Regression supports only liblinear, newton-cg, "
+                "lbfgs and sag solvers, Got solver=%s" % self.solver
                 )
 
         if self.solver == 'liblinear' and self.multi_class == 'multinomial':
@@ -1060,7 +1070,7 @@ def fit(self, X, y):
                 fit_intercept=self.fit_intercept, tol=self.tol,
                 verbose=self.verbose, solver=self.solver,
                 multi_class=self.multi_class, max_iter=self.max_iter,
-                class_weight=self.class_weight)
+                class_weight=self.class_weight, random_state=self.random_state)
             self.coef_.append(coef_[0])
 
         self.coef_ = np.squeeze(self.coef_)
 
@@ -21,6 +21,78 @@
 SPARSE_INTERCEPT_DECAY = 0.01
 
 
+def sag_logistic(X, y, coef_init, alpha=1e-4, sample_weight=None,
+                 max_iter=1000, tol=0.001, verbose=0, random_state=None):
+    """SAG solver for LogisticRegression"""
+
+    n_samples, n_features = X.shape[0], X.shape[1]
+
+    alpha = alpha / n_samples
+
+    # initialize all parameters if there is no init
+    if sample_weight is None:
+        sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
+
+    # coef_init contains eventually the intercept_init at the end.
+    fit_intercept = coef_init.size == (n_features + 1)
+    if fit_intercept:
+        intercept_init = coef_init[-1]
+        coef_init = coef_init[:-1]
+    else:
+        intercept_init = 0.0
+
+    # TODO: *_init (with a boolean warm-start) as parameters ?
+    intercept_sum_gradient_init = 0.0
+    sum_gradient_init = np.zeros(n_features, dtype=np.float64, order='C')
+    gradient_memory_init = np.zeros(n_samples, dtype=np.float64, order='C')
+    seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
+    num_seen_init = 0
+    weight_pos = 1
+    weight_neg = 1
+
+    random_state = check_random_state(random_state)
+
+    # check which type of Sequential Dataset is needed
+    if sp.issparse(X):
+        dataset = CSRDataset(X.data, X.indptr, X.indices,
+                             y, sample_weight,
+                             seed=random_state.randint(MAX_INT))
+        intercept_decay = SPARSE_INTERCEPT_DECAY
+    else:
+        dataset = ArrayDataset(X, y, sample_weight,
+                               seed=random_state.randint(MAX_INT))
+        intercept_decay = 1.0
+
+    # set the eta0 at 1 / 4L where L is the max sum of
+    # squares for over all samples
+    step_size = get_auto_eta(dataset, alpha, n_samples, Log(), fit_intercept)
+
+    intercept_, num_seen, max_iter_reached, intercept_sum_gradient = \
+        sag_sparse(dataset, coef_init.ravel(),
+                   intercept_init, n_samples,
+                   n_features, tol,
+                   max_iter,
+                   Log(),
+                   step_size, alpha,
+                   sum_gradient_init.ravel(),
+                   gradient_memory_init.ravel(),
+                   seen_init.ravel(),
+                   num_seen_init,
+                   weight_pos, weight_neg,
+                   fit_intercept,
+                   intercept_sum_gradient_init,
+                   intercept_decay,
+                   verbose)
+
+    if max_iter_reached:
+        warnings.warn("The max_iter was reached which means "
+                      "the coef_ did not converge", ConvergenceWarning)
+    if fit_intercept:
+        return np.append(coef_init, intercept_)
+    else:
+        return coef_init
+
+
 # taken from http://stackoverflow.com/questions/1816958
 # useful for passing instance methods to Parallel
 def multiprocess_method(instance, name, args=()):
 
@@ -105,7 +105,9 @@ def test_predict_iris():
                 LogisticRegression(C=len(iris.data), solver='lbfgs',
                                    multi_class='multinomial'),
                 LogisticRegression(C=len(iris.data), solver='newton-cg',
-                                   multi_class='multinomial')]:
+                                   multi_class='multinomial'),
+                LogisticRegression(C=len(iris.data), solver='sag',
+                                   multi_class='ovr')]:
         clf.fit(iris.data, target)
         assert_array_equal(np.unique(target), clf.classes_)
 
@@ -216,17 +218,17 @@ def test_consistency_path():
     f = ignore_warnings
     # can't test with fit_intercept=True since LIBLINEAR
     # penalizes the intercept
-    for method in ('lbfgs', 'newton-cg', 'liblinear'):
+    for method in ('lbfgs', 'newton-cg', 'liblinear', 'sag'):
         coefs, Cs = f(logistic_regression_path)(
-            X, y, Cs=Cs, fit_intercept=False, tol=1e-16, solver=method)
+            X, y, Cs=Cs, fit_intercept=False, tol=1e-5, solver=method)
         for i, C in enumerate(Cs):
-            lr = LogisticRegression(C=C, fit_intercept=False, tol=1e-16)
+            lr = LogisticRegression(C=C, fit_intercept=False, tol=1e-5)
             lr.fit(X, y)
             lr_coef = lr.coef_.ravel()
             assert_array_almost_equal(lr_coef, coefs[i], decimal=4)
 
     # test for fit_intercept=True
-    for method in ('lbfgs', 'newton-cg', 'liblinear'):
+    for method in ('lbfgs', 'newton-cg', 'liblinear', 'sag'):
         Cs = [1e3]
         coefs, Cs = f(logistic_regression_path)(
             X, y, Cs=Cs, fit_intercept=True, tol=1e-4, solver=method)
@@ -450,29 +452,43 @@ def test_ovr_multinomial_iris():
 
 def test_logistic_regression_solvers():
     X, y = make_classification(n_features=10, n_informative=5, random_state=0)
-    clf_n = LogisticRegression(solver='newton-cg', fit_intercept=False)
-    clf_n.fit(X, y)
+    clf_new = LogisticRegression(solver='newton-cg', fit_intercept=False)
+    clf_new.fit(X, y)
     clf_lbf = LogisticRegression(solver='lbfgs', fit_intercept=False)
     clf_lbf.fit(X, y)
+    clf_sag = LogisticRegression(solver='sag', fit_intercept=False)
+    clf_sag.fit(X, y)
     clf_lib = LogisticRegression(fit_intercept=False)
     clf_lib.fit(X, y)
-    assert_array_almost_equal(clf_n.coef_, clf_lib.coef_, decimal=3)
+    assert_array_almost_equal(clf_new.coef_, clf_lib.coef_, decimal=3)
     assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=3)
-    assert_array_almost_equal(clf_n.coef_, clf_lbf.coef_, decimal=3)
+    assert_array_almost_equal(clf_new.coef_, clf_lbf.coef_, decimal=3)
+    assert_array_almost_equal(clf_sag.coef_, clf_lib.coef_, decimal=3)
+    assert_array_almost_equal(clf_sag.coef_, clf_new.coef_, decimal=3)
+    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=3)
 
 
 def test_logistic_regression_solvers_multiclass():
+    tol = 1e-6
     X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                                n_classes=3, random_state=0)
-    clf_n = LogisticRegression(solver='newton-cg', fit_intercept=False)
-    clf_n.fit(X, y)
-    clf_lbf = LogisticRegression(solver='lbfgs', fit_intercept=False)
+    clf_new = LogisticRegression(solver='newton-cg', fit_intercept=False,
+                                 tol=tol)
+    clf_new.fit(X, y)
+    clf_lbf = LogisticRegression(solver='lbfgs', fit_intercept=False,
+                                 tol=tol)
     clf_lbf.fit(X, y)
-    clf_lib = LogisticRegression(fit_intercept=False)
+    clf_sag = LogisticRegression(solver='sag', fit_intercept=False,
+                                 tol=tol, max_iter=1000)
+    clf_sag.fit(X, y)
+    clf_lib = LogisticRegression(fit_intercept=False, tol=tol)
     clf_lib.fit(X, y)
-    assert_array_almost_equal(clf_n.coef_, clf_lib.coef_, decimal=4)
+    assert_array_almost_equal(clf_new.coef_, clf_lib.coef_, decimal=4)
     assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
-    assert_array_almost_equal(clf_n.coef_, clf_lbf.coef_, decimal=4)
+    assert_array_almost_equal(clf_new.coef_, clf_lbf.coef_, decimal=4)
+    assert_array_almost_equal(clf_sag.coef_, clf_lib.coef_, decimal=4)
+    assert_array_almost_equal(clf_sag.coef_, clf_new.coef_, decimal=4)
+    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
 
 
 def test_logistic_regressioncv_class_weights():
@@ -499,7 +515,12 @@ def test_logistic_regressioncv_class_weights():
     clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                    class_weight='auto')
     clf_lib.fit(X, y)
+    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
+                                   class_weight='auto')
+    clf_sag.fit(X, y)
     assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
+    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
+    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)
 
 
 def test_logistic_regression_convergence_warnings():