MarcBresson
diff --git a/‎sklearn/linear_model/__init__.py
Lines changed: 0 additions & 6 deletions b/‎sklearn/linear_model/__init__.py
Lines changed: 0 additions & 6 deletions
diff --git a/‎sklearn/linear_model/_logistic.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/linear_model/_logistic.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/linear_model/_sag.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/_sag.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/linear_model/_sag_fast.pyx.tp
Lines changed: 12 additions & 15 deletions b/‎sklearn/linear_model/_sag_fast.pyx.tp
Lines changed: 12 additions & 15 deletions
diff --git a/‎sklearn/linear_model/_sgd_fast.pxd
Lines changed: 0 additions & 27 deletions b/‎sklearn/linear_model/_sgd_fast.pxd
Lines changed: 0 additions & 27 deletions
@@ -44,7 +44,6 @@
 from ._quantile import QuantileRegressor
 from ._ransac import RANSACRegressor
 from ._ridge import Ridge, RidgeClassifier, RidgeClassifierCV, RidgeCV, ridge_regression
-from ._sgd_fast import Hinge, Huber, Log, ModifiedHuber, SquaredLoss
 from ._stochastic_gradient import SGDClassifier, SGDOneClassSVM, SGDRegressor
 from ._theil_sen import TheilSenRegressor
 
@@ -53,8 +52,6 @@
     "BayesianRidge",
     "ElasticNet",
     "ElasticNetCV",
-    "Hinge",
-    "Huber",
     "HuberRegressor",
     "Lars",
     "LarsCV",
@@ -64,10 +61,8 @@
     "LassoLarsCV",
     "LassoLarsIC",
     "LinearRegression",
-    "Log",
     "LogisticRegression",
     "LogisticRegressionCV",
-    "ModifiedHuber",
     "MultiTaskElasticNet",
     "MultiTaskElasticNetCV",
     "MultiTaskLasso",
@@ -85,7 +80,6 @@
     "SGDClassifier",
     "SGDRegressor",
     "SGDOneClassSVM",
-    "SquaredLoss",
     "TheilSenRegressor",
     "enet_path",
     "lars_path",
 
@@ -313,14 +313,14 @@ def _logistic_regression_path(
         w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)
         mask = y == pos_class
         y_bin = np.ones(y.shape, dtype=X.dtype)
-        if solver in ["lbfgs", "newton-cg", "newton-cholesky"]:
+        if solver == "liblinear":
+            mask_classes = np.array([-1, 1])
+            y_bin[~mask] = -1.0
+        else:
             # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
             # of in [-1, 1].
             mask_classes = np.array([0, 1])
             y_bin[~mask] = 0.0
-        else:
-            mask_classes = np.array([-1, 1])
-            y_bin[~mask] = -1.0
 
         # for compute_class_weight
         if class_weight == "balanced":
 
@@ -127,7 +127,7 @@ def sag_solver(
 
     y : ndarray of shape (n_samples,)
         Target values. With loss='multinomial', y must be label encoded
-        (see preprocessing.LabelEncoder).
+        (see preprocessing.LabelEncoder). For loss='log' it must be in [0, 1].
 
     sample_weight : array-like of shape (n_samples,), default=None
         Weights applied to individual samples (1. for unweighted).
 
@@ -29,14 +29,11 @@ dtypes = [('64', 'double', 'np.float64'),
 import numpy as np
 from libc.math cimport exp, fabs, isfinite, log
 from libc.time cimport time, time_t
+from libc.stdio cimport printf
 
-from ._sgd_fast cimport LossFunction
-from ._sgd_fast cimport Log, SquaredLoss
-
+from .._loss._loss cimport CyLossFunction, CyHalfSquaredError, CyHalfBinomialLoss
 from ..utils._seq_dataset cimport SequentialDataset32, SequentialDataset64
 
-from libc.stdio cimport printf
-
 
 {{for name_suffix, c_type, np_type in dtypes}}
 
@@ -77,7 +74,7 @@ cdef {{c_type}} _logsumexp{{name_suffix}}({{c_type}}* arr, int n_classes) noexce
 {{for name_suffix, c_type, np_type in dtypes}}
 
 cdef class MultinomialLogLoss{{nam
8000
e_suffix}}:
-    cdef {{c_type}} _loss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
+    cdef {{c_type}} cy_loss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
                       {{c_type}} sample_weight) noexcept nogil:
         r"""Multinomial Logistic regression loss.
 
@@ -121,7 +118,7 @@ cdef class MultinomialLogLoss{{name_suffix}}:
         loss = (logsumexp_prediction - prediction[int(y)]) * sample_weight
         return loss
 
-    cdef void dloss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
+    cdef void cy_gradient(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
                      {{c_type}} sample_weight, {{c_type}}* gradient_ptr) noexcept nogil:
         r"""Multinomial Logistic regression gradient of the loss.
 
@@ -331,7 +328,7 @@ def sag{{name_suffix}}(
     cdef bint prox = beta > 0 and saga
 
     # Loss function to optimize
-    cdef LossFunction loss
+    cdef CyLossFunction loss
     # Whether the loss function is multinomial
     cdef bint multinomial = False
     # Multinomial loss function
@@ -341,9 +338,9 @@ def sag{{name_suffix}}(
         multinomial = True
         multiloss = MultinomialLogLoss{{name_suffix}}()
     elif loss_function == "log":
-        loss = Log()
+        loss = CyHalfBinomialLoss()
     elif loss_function == "squared":
-        loss = SquaredLoss()
+        loss = CyHalfSquaredError()
     else:
         raise ValueError("Invalid loss parameter: got %s instead of "
                          "one of ('log', 'squared', 'multinomial')"
@@ -406,9 +403,9 @@ def sag{{name_suffix}}(
 
                 # compute the gradient for this sample, given the prediction
                 if multinomial:
-                    multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0])
+                    multiloss.cy_gradient(y, &prediction[0], n_classes, sample_weight, &gradient[0])
                 else:
-                    gradient[0] = loss.dloss(y, prediction[0]) * sample_weight
+                    gradient[0] = loss.cy_gradient(y, prediction[0]) * sample_weight
 
                 # L2 regularization by simply rescaling the weights
                 wscale *= wscale_update
@@ -539,7 +536,7 @@ def sag{{name_suffix}}(
                               (n_iter + 1, end_time - start_time))
                 break
             elif verbose:
-                printf('Epoch %d, change: %.8f\n', n_iter + 1,
+                printf('Epoch %d, change: %.8g\n', n_iter + 1,
                                                   max_change / max_weight)
     n_iter += 1
     # We do the error treatment here based on error code in status to avoid
@@ -827,10 +824,10 @@ def _multinomial_grad_loss_all_samples(
             )
 
             # compute the gradient for this sample, given the prediction
-            multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0])
+            multiloss.cy_gradient(y, &prediction[0], n_classes, sample_weight, &gradient[0])
 
             # compute the loss for this sample, given the prediction
-            sum_loss += multiloss._loss(y, &prediction[0], n_classes, sample_weight)
+            sum_loss += multiloss.cy_loss(y, &prediction[0], n_classes, sample_weight)
 
             # update the sum of the gradient
             for j in range(xnnz):