8000 Merge pull request #4261 from ragv/svm_scale_c_plot · ogrisel/scikit-learn@a926626 · GitHub
[go: up one dir, main page]

Skip to content

Commit a926626

Browse files
committed
Merge pull request scikit-learn#4261 from ragv/svm_scale_c_plot
[MRG+1] Broken svm scale C example
2 parents 3f5277e + 6d222fd commit a926626

File tree

4 files changed

+188
-72
lines changed

4 files changed

+188
-72
lines changed

examples/svm/plot_svm_scale_c.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,19 @@
3838
3939
The figures below are used to illustrate the effect of scaling our
4040
`C` to compensate for the change in the number of samples, in the
41-
case of using an `L1` penalty, as well as the `L2` penalty.
41+
case of using an `l1` penalty, as well as the `l2` penalty.
4242
43-
L1-penalty case
43+
l1-penalty case
4444
-----------------
45-
In the `L1` case, theory says that prediction consistency
45+
In the `l1` case, theory says that prediction consistency
4646
(i.e. that under given hypothesis, the estimator
4747
learned predicts as well as a model knowing the true distribution)
48-
is not possible because of the bias of the `L1`. It does say, however,
48+
is not possible because of the bias of the `l1`. It does say, however,
4949
that model consistency, in terms of finding the right set of non-zero
5050
parameters as well as their signs, can be achieved by scaling
5151
`C1`.
5252
53-
L2-penalty case
53+
l2-penalty case
5454
-----------------
5555
The theory says that in order to achieve prediction consistency, the
5656
penalty parameter should be kept constant
@@ -63,17 +63,17 @@
6363
corresponding cross-validation scores on the `y-axis`, for several different
6464
fractions of a generated data-set.
6565
66-
In the `L1` penalty case, the cross-validation-error correlates best with
66+
In the `l1` penalty case, the cross-validation-error correlates best with
6767
the test-error, when scaling our `C` with the number of samples, `n`,
6868
which can be seen in the first figure.
6969
70-
For the `L2` penalty case, the best result comes from the case where `C`
70+
For the `l2` penalty case, the best result comes from the case where `C`
7171
is not scaled.
7272
7373
.. topic:: Note:
7474
7575
Two separate datasets are used for the two different plots. The reason
76-
behind this is the `L1` case works better on sparse data, while `L2`
76+
behind this is the `l1` case works better on sparse data, while `l2`
7777
is better suited to the non-sparse case.
7878
"""
7979
print(__doc__)
@@ -100,20 +100,20 @@
100100
n_samples = 100
101101
n_features = 300
102102

103-
# L1 data (only 5 informative features)
103+
# l1 data (only 5 informative features)
104104
X_1, y_1 = datasets.make_classification(n_samples=n_samples,
105105
n_features=n_features, n_informative=5,
106106
random_state=1)
107107

108-
# L2 data: non sparse, but less features
108+
# l2 data: non sparse, but less features
109109
y_2 = np.sign(.5 - rnd.rand(n_samples))
110110
X_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]
111111
X_2 += 5 * rnd.randn(n_samples, n_features / 5)
112112

113-
clf_sets = [(LinearSVC(penalty='L1', loss='L2', dual=False,
113+
clf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,
114114
tol=1e-3),
115115
np.logspace(-2.3, -1.3, 10), X_1, y_1),
116-
(LinearSVC(penalty='L2', loss='L2', dual=True,
116+
(LinearSVC(penalty='l2', loss='squared_hinge', dual=True,
117117
tol=1e-4),
118118
np.logspace(-4.5, -2, 10), X_2, y_2)]
119119

sklearn/svm/base.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@
77

88
from . import libsvm, liblinear
99
from . import libsvm_sparse
10-
from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
10+
from ..base import BaseEstimator, ClassifierMixin
1111
from ..preprocessing import LabelEncoder
1212
from ..utils import check_array, check_random_state, column_or_1d
1313
from ..utils import ConvergenceWarning, compute_class_weight
1414
from ..utils.extmath import safe_sparse_dot
1515
from ..utils.validation import check_is_fitted
1616
from ..externals import six
1717

18-
1918
LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
2019

2120

@@ -70,7 +69,7 @@ def __init__(self, impl, kernel, degree, gamma, coef0,
7069
tol, C, nu, epsilon, shrinking, probability, cache_size,
7170
class_weight, verbose, max_iter, random_state):
7271

73-
if not impl in LIBSVM_IMPL: # pragma: no cover
72+
if impl not in LIBSVM_IMPL: # pragma: no cover
7473
raise ValueError("impl should be one of %s, %s was given" % (
7574
LIBSVM_IMPL, impl))
7675

@@ -384,7 +383,7 @@ def decision_fu 10000 nction(self, X):
384383

385384
def _validate_for_predict(self, X):
386385
check_is_fitted(self, 'support_')
387-
386+
388387
X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C")
389388
if self._sparse and not sp.isspmatrix(X):
390389
X = sp.csr_matrix(X)
@@ -604,63 +603,63 @@ def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
604603
- loss
605604
- dual
606605
607-
The same number is internally by LibLinear to determine which
608-
solver to use.
606+
The same number is also internally used by LibLinear to determine
607+
which solver to use.
609608
"""
610-
611-
# nested dicts containing level 1: available loss functions,
609+
# nested dicts containing level 1: available loss functions,
612610
# level2: available penalties for the given loss functin,
613611
# level3: wether the dual solver is available for the specified
614612
# combination of loss function and penalty
615613
_solver_type_dict = {
616614
'logistic_regression': {
617615
'l1': {False: 6},
618616
'l2': {False: 0, True: 7}},
619-
'hinge' : {
620-
'l2' : {True: 3}},
617+
'hinge': {
618+
'l2': {True: 3}},
621619
'squared_hinge': {
622-
'l1': {False : 5},
620+
'l1': {False: 5},
623621
'l2': {False: 2, True: 1}},
624622
'epsilon_insensitive': {
625623
'l2': {True: 13}},
626624
'squared_epsilon_insensitive': {
627625
'l2': {False: 11, True: 12}},
628626
'crammer_singer': 4
629627
}
630-
631628

632629
if multi_class == 'crammer_singer':
633630
return _solver_type_dict[multi_class]
634631
elif multi_class != 'ovr':
635632
raise ValueError("`multi_class` must be one of `ovr`, "
636633
"`crammer_singer`, got %r" % multi_class)
637634

638-
_solver_pen = _solver_type_dict.get(loss, None)
635+
# FIXME loss.lower() --> loss in 0.18
636+
_solver_pen = _solver_type_dict.get(loss.lower(), None)
639637
if _solver_pen is None:
640-
error_string = ("Loss %s is not supported" % loss)
638+
error_string = ("loss='%s' is not supported" % loss)
641639
else:
642-
_solver_dual = _solver_pen.get(penalty, None)
640+
# FIME penalty.lower() --> penalty in 0.18
641+
_solver_dual = _solver_pen.get(penalty.lower(), None)
643642
if _solver_dual is None:
644643
error_string = ("The combination of penalty='%s'"
645644
"and loss='%s' is not supported"
646-
% (loss, penalty))
645+
% (penalty, loss))
647646
else:
648647
solver_num = _solver_dual.get(dual, None)
649648
if solver_num is None:
650649
error_string = ("loss='%s' and penalty='%s'"
651650
"are not supported when dual=%s"
652-
% (loss, penalty, dual))
651+
% (penalty, loss, dual))
653652
else:
654653
return solver_num
655-
raise ValueError('Unsupported set of arguments: %s, '
656-
'Parameters: penalty=%r, loss=%r, dual=%r'
657-
% (error_string, penalty, loss, dual))
658-
return _solver_type_dict[solver_type]
654+
655+
raise ValueError(('Unsupported set of arguments: %s, '
656+
'Parameters: penalty=%r, loss=%r, dual=%r')
657+
% (error_string, penalty, loss, dual))
659658

660659

661660
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
662661
penalty, dual, verbose, max_iter, tol,
663-
random_state=None, multi_class='ovr',
662+
random_state=None, multi_class='ovr',
664663
loss='logistic_regression', epsilon=0.1):
665664
"""Used by Logistic Regression (and CV) and LinearSVC.
666665
@@ -722,7 +721,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
722721
If `crammer_singer` is chosen, the options loss, penalty and dual will
723722
be ignored.
724723
725-
loss : str, {'logistic_regression', 'hinge', 'squared_hinge',
724+
loss : str, {'logistic_regression', 'hinge', 'squared_hinge',
726725
'epsilon_insensitive', 'squared_epsilon_insensitive}
727726
The loss function used to fit the model.
728727
@@ -743,7 +742,23 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
743742
n_iter_ : int
744743
Maximum number of iterations run across all classes.
745744
"""
746-
if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
745+
# FIXME Remove case insensitivity in 0.18 ---------------------
746+
loss_l, penalty_l = loss.lower(), penalty.lower()
747+
748+
msg = ("loss='%s' has been deprecated in favor of "
749+
"loss='%s' as of 0.16. Backward compatibility"
750+
" for the uppercase notation will be removed in %s")
751+
if (not loss.islower()) and loss_l not in ('l1', 'l2'):
752+
warnings.warn(msg % (loss, loss_l, "0.18"),
753+
DeprecationWarning)
754+
if not penalty.islower():
755+
warnings.warn(msg.replace("loss", "penalty")
756+
% (penalty, penalty_l, "0.18"),
757+
DeprecationWarning)
758+
# -------------------------------------------------------------
759+
760+
# FIXME loss_l --> loss in 0.18
761+
if loss_l not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
747762
enc = LabelEncoder()
748763
y_ind = enc.fit_transform(y)
749764
classes_ = enc.classes_
@@ -772,7 +787,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
772787
# LibLinear wants targets as doubles, even for classification
773788
y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
774789
solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
775-
raw_coef_, n_iter_ = liblinear.train_wrap(
790+
raw_coef_, n_iter_ = liblinear.train_wrap(
776791
X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
777792
class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
778793
epsilon

sklearn/svm/classes.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
2828
2929
loss : string, 'hinge' or 'squared_hinge' (default='squared_hinge')
3030
Specifies the loss function. 'hinge' is the standard SVM loss
31-
(used e.g. by the SVC class) while 'squared_hinge' is the
31+
(used e.g. by the SVC class) while 'squared_hinge' is the
3232
square of the hinge loss.
3333
3434
penalty : string, 'l1' or 'l2' (default='l2')
@@ -143,11 +143,10 @@ class frequencies.
143143
144144
"""
145145

146-
def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.0,
147-
multi_class='ovr', fit_intercept=True, intercept_scaling=1,
148-
class_weight=None, verbose=0, random_state=None, max_iter=1000):
149-
self.penalty = penalty
150-
self.loss = loss
146+
def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4,
147+
C=1.0, multi_class='ovr', fit_intercept=True,
148+
intercept_scaling=1, class_weight=None, verbose=0,
149+
random_state=None, max_iter=1000):
151150
self.dual = dual
152151
self.tol = tol
153152
self.C = C
@@ -158,6 +157,8 @@ def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.
158157
self.verbose = verbose
159158
self.random_state = random_state
160159
self.max_iter = max_iter
160+
self.penalty = penalty
161+
self.loss = loss
161162

162163
def fit(self, X, y):
163164
"""Fit the model according to the given training data.
@@ -176,26 +177,34 @@ def fit(self, X, y):
176177
self : object
177178
Returns self.
178179
"""
180+
# FIXME Remove l1/l2 support in 1.0 -----------------------------------
181+
loss_l = self.loss.lower()
182+
183+
msg = ("loss='%s' has been deprecated in favor of "
184+
"loss='%s' as of 0.16. Backward compatibility"
185+
" for the loss='%s' will be removed in %s")
186+
187+
# FIXME change loss_l --> self.loss after 0.18
188+
if loss_l in ('l1', 'l2'):
189+
old_loss = self.loss
190+
self.loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(loss_l)
191+
warnings.warn(msg % (old_loss, self.loss, old_loss, '1.0'),
192+
DeprecationWarning)
193+
# ---------------------------------------------------------------------
194+
179195
if self.C < 0:
180196
raise ValueError("Penalty term must be positive; got (C=%r)"
181197
% self.C)
182198

183-
X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
199+
X, y = check_X_y(X, y, accept_sparse='csr',
200+
dtype=np.float64, order="C")
184201
self.classes_ = np.unique(y)
185202

186-
if self.loss in ('l1', 'l2'):
187-
# convert for backwards compatibility
188-
loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(self.loss)
189-
warnings.warn("loss='l1' (resp. loss='l2') is deprecated and will" +
190-
"be removed before version 1.0. Please use loss='hinge'" +
191-
"(resp. loss='squared_hinge') instead", DeprecationWarning)
192-
else:
193-
loss = self.loss
194203
self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
195204
X, y, self.C, self.fit_intercept, self.intercept_scaling,
196205
self.class_weight, self.penalty, self.dual, self.verbose,
197206
self.max_iter, self.tol, self.random_state, self.multi_class,
198-
loss
207+
self.loss
199208
)
200209

201210
if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
@@ -223,7 +232,7 @@ class LinearSVR(LinearModel, RegressorMixin):
223232
Penalty parameter C of the error term. The penalty is a squared
224233
l2 penalty. The bigger this parameter, the less regularization is used.
225234
226-
loss : string, 'epsilon_insensitive' or 'squared_epsilon_insensitive'
235+
loss : string, 'epsilon_insensitive' or 'squared_epsilon_insensitive'
227236
(default='epsilon_insensitive')
228237
Specifies the loss function. 'l1' is the epsilon-insensitive loss
229238
(standard SVR) while 'l2' is the squared epsilon-insensitive loss.
@@ -300,8 +309,9 @@ class LinearSVR(LinearModel, RegressorMixin):
300309
various loss functions and regularization regimes.
301310
"""
302311

303-
def __init__(self, epsilon=0.0, tol=1e-4, C=1.0, loss='epsilon_insensitive',
304-
fit_intercept=True, intercept_scaling=1., dual=True, verbose=0,
312+
def __init__(self, epsilon=0.0, tol=1e-4, C=1.0,
313+
loss='epsilon_insensitive', fit_intercept=True,
314+
intercept_scaling=1., dual=True, verbose=0,
305315
random_state=None, max_iter=1000):
306316
self.tol = tol
307317
self.C = C
@@ -331,12 +341,30 @@ def fit(self, X, y):
331341
self : object
332342
Returns self.
333343
"""
344+
# FIXME Remove l1/l2 support in 1.0 -----------------------------------
345+
loss_l = self.loss.lower()
346+
347+
msg = ("loss='%s' has been deprecated in favor of "
348+
"loss='%s' as of 0.16. Backward compatibility"
349+
" for the loss='%s' will be removed in %s")
350+
351+
# FIXME change loss_l --> self.loss after 0.18
352+
if loss_l in ('l1', 'l2'):
353+
old_loss = self.loss
354+
self.loss = {'l1': 'epsilon_insensitive',
355+
'l2': 'squared_epsilon_insensitive'
356+
}.get(loss_l)
357+
warnings.warn(msg % (old_loss, self.loss, old_loss, '1.0'),
358+
DeprecationWarning)
359+
# ---------------------------------------------------------------------
360+
334361
if self.C < 0:
335362
raise ValueError("Penalty term must be positive; got (C=%r)"
336363
% self.C)
337364

338-
X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
339-
penalty = 'l2' # SVR only accepts L2 penalty
365+
X, y = check_X_y(X, y, accept_sparse='csr',
366+
dtype=np.float64, order="C")
367+
penalty = 'l2' # SVR only accepts l2 penalty
340368
self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
341369
X, y, self.C, self.fit_intercept, self.intercept_scaling,
342370
None, penalty, self.dual, self.verbose,

0 commit comments

Comments
 (0)
0