ogrisel
diff --git a/‎examples/svm/plot_svm_scale_c.py
Lines changed: 12 additions & 12 deletions b/‎examples/svm/plot_svm_scale_c.py
Lines changed: 12 additions & 12 deletions
diff --git a/‎sklearn/svm/base.py
Lines changed: 40 additions & 25 deletions b/‎sklearn/svm/base.py
Lines changed: 40 additions & 25 deletions
diff --git a/‎sklearn/svm/classes.py
Lines changed: 49 additions & 21 deletions b/‎sklearn/svm/classes.py
Lines changed: 49 additions & 21 deletions
@@ -38,19 +38,19 @@
 
 The figures below are used to illustrate the effect of scaling our
 `C` to compensate for the change in the number of samples, in the
-case of using an `L1` penalty, as well as the `L2` penalty.
+case of using an `l1` penalty, as well as the `l2` penalty.
 
-L1-penalty case
+l1-penalty case
 -----------------
-In the `L1` case, theory says that prediction consistency
+In the `l1` case, theory says that prediction consistency
 (i.e. that under given hypothesis, the estimator
 learned predicts as well as a model knowing the true distribution)
-is not possible because of the bias of the `L1`. It does say, however,
+is not possible because of the bias of the `l1`. It does say, however,
 that model consistency, in terms of finding the right set of non-zero
 parameters as well as their signs, can be achieved by scaling
 `C1`.
 
-L2-penalty case
+l2-penalty case
 -----------------
 The theory says that in order to achieve prediction consistency, the
 penalty parameter should be kept constant
@@ -63,17 +63,17 @@
 corresponding cross-validation scores on the `y-axis`, for several different
 fractions of a generated data-set.
 
-In the `L1` penalty case, the cross-validation-error correlates best with
+In the `l1` penalty case, the cross-validation-error correlates best with
 the test-error, when scaling our `C` with the number of samples, `n`,
 which can be seen in the first figure.
 
-For the `L2` penalty case, the best result comes from the case where `C`
+For the `l2` penalty case, the best result comes from the case where `C`
 is not scaled.
 
 .. topic:: Note:
 
     Two separate datasets are used for the two different plots. The reason
-    behind this is the `L1` case works better on sparse data, while `L2`
+    behind this is the `l1` case works better on sparse data, while `l2`
     is better suited to the non-sparse case.
 """
 print(__doc__)
@@ -100,20 +100,20 @@
 n_samples = 100
 n_features = 300
 
-# L1 data (only 5 informative features)
+# l1 data (only 5 informative features)
 X_1, y_1 = datasets.make_classification(n_samples=n_samples,
                                         n_features=n_features, n_informative=5,
                                         random_state=1)
 
-# L2 data: non sparse, but less features
+# l2 data: non sparse, but less features
 y_2 = np.sign(.5 - rnd.rand(n_samples))
 X_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]
 X_2 += 5 * rnd.randn(n_samples, n_features / 5)
 
-clf_sets = [(LinearSVC(penalty='L1', loss='L2', dual=False,
+clf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,
                        tol=1e-3),
              np.logspace(-2.3, -1.3, 10), X_1, y_1),
-            (LinearSVC(penalty='L2', loss='L2', dual=True,
+            (LinearSVC(penalty='l2', loss='squared_hinge', dual=True,
                        tol=1e-4),
              np.logspace(-4.5, -2, 10), X_2, y_2)]
 
 
@@ -7,15 +7,14 @@
 
 from . import libsvm, liblinear
 from . import libsvm_sparse
-from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
+from ..base import BaseEstimator, ClassifierMixin
 from ..preprocessing import LabelEncoder
 from ..utils import check_array, check_random_state, column_or_1d
 from ..utils import ConvergenceWarning, compute_class_weight
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted
 from ..externals import six
 
-
 LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
 
 
@@ -70,7 +69,7 @@ def __init__(self, impl, kernel, degree, gamma, coef0,
                  tol, C, nu, epsilon, shrinking, probability, cache_size,
                  class_weight, verbose, max_iter, random_state):
 
-        if not impl in LIBSVM_IMPL:  # pragma: no cover
+        if impl not in LIBSVM_IMPL:  # pragma: no cover
             raise ValueError("impl should be one of %s, %s was given" % (
                 LIBSVM_IMPL, impl))
 
@@ -384,7 +383,7 @@ def decision_fu
10000
nction(self, X):
 
     def _validate_for_predict(self, X):
         check_is_fitted(self, 'support_')
-        
+
         X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C")
         if self._sparse and not sp.isspmatrix(X):
             X = sp.csr_matrix(X)
@@ -604,63 +603,63 @@ def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
       - loss
       - dual
 
-    The same number is internally by LibLinear to determine which
-    solver to use.
+    The same number is also internally used by LibLinear to determine
+    which solver to use.
     """
-
-    # nested dicts containing level 1: available loss functions, 
+    # nested dicts containing level 1: available loss functions,
     # level2: available penalties for the given loss functin,
     # level3: wether the dual solver is available for the specified
     # combination of loss function and penalty
     _solver_type_dict = {
         'logistic_regression': {
             'l1': {False: 6},
             'l2': {False: 0, True: 7}},
-        'hinge' : {
-            'l2' : {True: 3}},
+        'hinge': {
+            'l2': {True: 3}},
         'squared_hinge': {
-            'l1': {False : 5},
+            'l1': {False: 5},
             'l2': {False: 2, True: 1}},
         'epsilon_insensitive': {
             'l2': {True: 13}},
         'squared_epsilon_insensitive': {
             'l2': {False: 11, True: 12}},
         'crammer_singer': 4
     }
-    
 
     if multi_class == 'crammer_singer':
         return _solver_type_dict[multi_class]
     elif multi_class != 'ovr':
         raise ValueError("`multi_class` must be one of `ovr`, "
                          "`crammer_singer`, got %r" % multi_class)
 
-    _solver_pen = _solver_type_dict.get(loss, None)
+    # FIXME loss.lower() --> loss in 0.18
+    _solver_pen = _solver_type_dict.get(loss.lower(), None)
     if _solver_pen is None:
-        error_string = ("Loss %s is not supported" % loss)
+        error_string = ("loss='%s' is not supported" % loss)
     else:
-        _solver_dual = _solver_pen.get(penalty, None)
+        # FIME penalty.lower() --> penalty in 0.18
+        _solver_dual = _solver_pen.get(penalty.lower(), None)
         if _solver_dual is None:
             error_string = ("The combination of penalty='%s'"
                             "and loss='%s' is not supported"
-                            % (loss, penalty))
+                            % (penalty, loss))
         else:
             solver_num = _solver_dual.get(dual, None)
             if solver_num is None:
                 error_string = ("loss='%s' and penalty='%s'"
                                 "are not supported when dual=%s"
-                                % (loss, penalty, dual))
+                                % (penalty, loss, dual))
             else:
                 return solver_num
-    raise ValueError('Unsupported set of arguments: %s, '
-                         'Parameters: penalty=%r, loss=%r, dual=%r'
-                         % (error_string, penalty, loss, dual))
-    return _solver_type_dict[solver_type]
+    
+    raise ValueError(('Unsupported set of arguments: %s, '
+                      'Parameters: penalty=%r, loss=%r, dual=%r')
+                     % (error_string, penalty, loss, dual))
 
 
 def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
                    penalty, dual, verbose, max_iter, tol,
-                   random_state=None, multi_class='ovr', 
+                   random_state=None, multi_class='ovr',
                    loss='logistic_regression', epsilon=0.1):
     """Used by Logistic Regression (and CV) and LinearSVC.
 
@@ -722,7 +721,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
         If `crammer_singer` is chosen, the options loss, penalty and dual will
         be ignored.
 
-    loss : str, {'logistic_regression', 'hinge', 'squared_hinge', 
+    loss : str, {'logistic_regression', 'hinge', 'squared_hinge',
                  'epsilon_insensitive', 'squared_epsilon_insensitive}
         The loss function used to fit the model.
 
@@ -743,7 +742,23 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     n_iter_ : int
         Maximum number of iterations run across all classes.
     """
-    if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
+    # FIXME Remove case insensitivity in 0.18 ---------------------
+    loss_l, penalty_l = loss.lower(), penalty.lower()
+
+    msg = ("loss='%s' has been deprecated in favor of "
+           "loss='%s' as of 0.16. Backward compatibility"
+           " for the uppercase notation will be removed in %s")
+    if (not loss.islower()) and loss_l not in ('l1', 'l2'):
+        warnings.warn(msg % (loss, loss_l, "0.18"),
+                      DeprecationWarning)
+    if not penalty.islower():
+        warnings.warn(msg.replace("loss", "penalty")
+                      % (penalty, penalty_l, "0.18"),
+                      DeprecationWarning)
+    # -------------------------------------------------------------
+
+    # FIXME loss_l --> loss in 0.18
+    if loss_l not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
         enc = LabelEncoder()
         y_ind = enc.fit_transform(y)
         classes_ = enc.classes_
@@ -772,7 +787,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     # LibLinear wants targets as doubles, even for classification
     y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
     solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
-    raw_coef_, n_iter_  = liblinear.train_wrap(
+    raw_coef_, n_iter_ = liblinear.train_wrap(
         X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
         class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
         epsilon
 
@@ -28,7 +28,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
 
     loss : string, 'hinge' or 'squared_hinge' (default='squared_hinge')
         Specifies the loss function. 'hinge' is the standard SVM loss
-        (used e.g. by the SVC class) while 'squared_hinge' is the 
+        (used e.g. by the SVC class) while 'squared_hinge' is the
         square of the hinge loss.
 
     penalty : string, 'l1' or 'l2' (default='l2')
@@ -143,11 +143,10 @@ class frequencies.
 
     """
 
-    def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.0,
-                 multi_class='ovr', fit_intercept=True, intercept_scaling=1,
-                 class_weight=None, verbose=0, random_state=None, max_iter=1000):
-        self.penalty = penalty
-        self.loss = loss
+    def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4,
+                 C=1.0, multi_class='ovr', fit_intercept=True,
+                 intercept_scaling=1, class_weight=None, verbose=0,
+                 random_state=None, max_iter=1000):
         self.dual = dual
         self.tol = tol
         self.C = C
@@ -158,6 +157,8 @@ def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.
         self.verbose = verbose
         self.random_state = random_state
         self.max_iter = max_iter
+        self.penalty = penalty
+        self.loss = loss
 
     def fit(self, X, y):
         """Fit the model according to the given training data.
@@ -176,26 +177,34 @@ def fit(self, X, y):
         self : object
             Returns self.
         """
+        # FIXME Remove l1/l2 support in 1.0 -----------------------------------
+
+        msg = ("loss='%s' has been deprecated in favor of "
+               "loss='%s' as of 0.16. Backward compatibility"
+               " for the loss='%s' will be removed in %s")
+
+        # FIXME change loss_l --> self.loss after 0.18
+        if loss_l in ('l1', 'l2'):
+            old_loss = self.loss
+            self.loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(loss_l)
+            warnings.warn(msg % (old_loss, self.loss, old_loss, '1.0'),
+                          DeprecationWarning)
+        # ---------------------------------------------------------------------
+
         if self.C < 0:
             raise ValueError("Penalty term must be positive; got (C=%r)"
                              % self.C)
 
-        X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
+        X, y = check_X_y(X, y, accept_sparse='csr',
+                         dtype=np.float64, order="C")
         self.classes_ = np.unique(y)
 
-        if self.loss in ('l1', 'l2'):
-            # convert for backwards compatibility
-            loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(self.loss)
-            warnings.warn("loss='l1' (resp. loss='l2') is deprecated and will" +
-                          "be removed before version 1.0. Please use loss='hinge'" +
-                          "(resp. loss='squared_hinge') instead", DeprecationWarning)
-        else:
-            loss = self.loss
         self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
             X, y, self.C, self.fit_intercept, self.intercept_scaling,
             self.class_weight, self.penalty, self.dual, self.verbose,
             self.max_iter, self.tol, self.random_state, self.multi_class,
-            loss
+            self.loss
             )
 
         if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
@@ -223,7 +232,7 @@ class LinearSVR(LinearModel, RegressorMixin):
         Penalty parameter C of the error term. The penalty is a squared
         l2 penalty. The bigger this parameter, the less regularization is used.
 
-    loss : string, 'epsilon_insensitive' or 'squared_epsilon_insensitive' 
+    loss : string, 'epsilon_insensitive' or 'squared_epsilon_insensitive'
            (default='epsilon_insensitive')
         Specifies the loss function. 'l1' is the epsilon-insensitive loss
         (standard SVR) while 'l2' is the squared epsilon-insensitive loss.
@@ -300,8 +309,9 @@ class LinearSVR(LinearModel, RegressorMixin):
         various loss functions and regularization regimes.
     """
 
-    def __init__(self, epsilon=0.0, tol=1e-4, C=1.0, loss='epsilon_insensitive', 
-                 fit_intercept=True, intercept_scaling=1., dual=True, verbose=0, 
+    def __init__(self, epsilon=0.0, tol=1e-4, C=1.0,
+                 loss='epsilon_insensitive', fit_intercept=True,
+                 intercept_scaling=1., dual=True, verbose=0,
                  random_state=None, max_iter=1000):
         self.tol = tol
         self.C = C
@@ -331,12 +341,30 @@ def fit(self, X, y):
         self : object
             Returns self.
         """
+        # FIXME Remove l1/l2 support in 1.0 -----------------------------------
+        loss_l = self.loss.lower()
+
+        msg = ("loss='%s' has been deprecated in favor of "
+               "loss='%s' as of 0.16. Backward compatibility"
+               " for the loss='%s' will be removed in %s")
+
+        # FIXME change loss_l --> self.loss after 0.18
+        if loss_l in ('l1', 'l2'):
+            old_loss = self.loss
+            self.loss = {'l1': 'epsilon_insensitive',
+                         'l2': 'squared_epsilon_insensitive'
+                         }.get(loss_l)
+            warnings.warn(msg % (old_loss, self.loss, old_loss, '1.0'),
+                          DeprecationWarning)
+        # ---------------------------------------------------------------------
+
         if self.C < 0:
             raise ValueError("Penalty term must be positive; got (C=%r)"
                              % self.C)
 
-        X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
-        penalty = 'l2' # SVR only accepts L2 penalty
+        X, y = check_X_y(X, y, accept_sparse='csr',
+                         dtype=np.float64, order="C")
+        penalty = 'l2'  # SVR only accepts l2 penalty
         self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
             X, y, self.C, self.fit_intercept, self.intercept_scaling,
             None, penalty, self.dual, self.verbose,