8000 Refactoring in svm module. · scikit-learn/scikit-learn@138e688 · GitHub
[go: up one dir, main page]

Skip to content

Commit 138e688

Browse files
author
Fabian Pedregosa
committed
Refactoring in svm module.
Renaming and module denesting. This is just a refactoring of low-level routines to ease usage of the low-level API. Higher level API was not changed.
1 parent 4699607 commit 138e688

18 files changed

+7658
-811
lines changed

scikits/learn/linear_model/logistic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from ..base import ClassifierMixin
44
from ..linear_model.base import CoefSelectTransformerMixin
55
from ..svm.base import BaseLibLinear
6-
from ..svm import _liblinear
6+
from ..svm import liblinear
77

88
class LogisticRegression(BaseLibLinear, ClassifierMixin,
99
CoefSelectTransformerMixin):
@@ -97,7 +97,7 @@ def predict_proba(self, X):
9797
order.
9898
"""
9999
X = np.asanyarray(X, dtype=np.float64, order='C')
100-
probas = _liblinear.predict_prob_wrap(X, self.raw_coef_,
100+
probas = liblinear.predict_prob_wrap(X, self.raw_coef_,
101101
self._get_solver_type(),
102102
self.tol, self.C,
103103
self.class_weight_label,

scikits/learn/linear_model/sparse/logistic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ...base import ClassifierMixin
1111
from ...svm.sparse.base import SparseBaseLibLinear
1212
from ...linear_model.sparse.base import CoefSelectTransformerMixin
13-
from ...svm._liblinear import csr_predict_prob
13+
from ...svm.liblinear import csr_predict_prob
1414

1515
class LogisticRegression(SparseBaseLibLinear, ClassifierMixin,
1616
CoefSelectTransformerMixin):

scikits/learn/svm/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,5 @@
1010
License: New BSD, (C) INRIA 2010
1111
"""
1212

13-
from .libsvm import SVC, NuSVC, SVR, NuSVR, OneClassSVM
14-
from .liblinear import LinearSVC
15-
from . import sparse
13+
from .classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC
14+
from . import sparse, libsvm, liblinear

scikits/learn/svm/base.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22

3-
from ._libsvm import libsvm_train, libsvm_predict, libsvm_predict_proba, \
4-
libsvm_decision_function, set_verbosity_wrap
5-
from . import _liblinear
3+
from . import libsvm, liblinear
64
from ..base import BaseEstimator
75

86

@@ -142,7 +140,7 @@ def fit(self, X, y, class_weight={}, sample_weight=[], **params):
142140
self.support_, self.support_vectors_, self.n_support_, \
143141
self.dual_coef_, self.intercept_, self.label_, self.probA_, \
144142
self.probB_ = \
145-
libsvm_train(_X, y, solver_type, kernel_type, self.degree,
143+
libsvm.libsvm_train(_X, y, solver_type, kernel_type, self.degree,
146144
self.gamma, self.coef0, self.tol, self.C,
147145
B41A self.nu, self.cache_size, self.p,
148146
self.class_weight_label, self.class_weight,
@@ -182,7 +180,7 @@ def predict(self, X):
182180
raise ValueError("X.shape[1] should be equal to the number of "
183181
"features at training time!")
184182

185-
return libsvm_predict(X, self.support_vectors_,
183+
return libsvm.libsvm_predict(X, self.support_vectors_,
186184
self.dual_coef_, self.intercept_,
187185
self._svm_types.index(self.impl), kernel_type,
188186
self.degree, self.gamma, self.coef0, self.tol,
@@ -224,7 +222,7 @@ def predict_proba(self, T):
224222
if self.impl not in ('c_svc', 'nu_svc'):
225223
raise NotImplementedError
226224

227-
pprob = libsvm_predict_proba(T, self.support_vectors_,
225+
pprob = libsvm.libsvm_predict_proba(T, self.support_vectors_,
228226
self.dual_coef_, self.intercept_,
229227
self._svm_types.index(self.impl), kernel_type,
230228
self.degree, self.gamma, self.coef0, self.tol,
@@ -279,7 +277,7 @@ def decision_function(self, T):
279277
T = np.atleast_2d(np.asanyarray(T, dtype=np.float64, order='C'))
280278
kernel_type, T = self._get_kernel(T)
281279

282-
dec_func = libsvm_decision_function(T, self.support_vectors_,
280+
dec_func = libsvm.libsvm_decision_function(T, self.support_vectors_,
283281
self.dual_coef_, self.intercept_,
284282
self._svm_types.index(self.impl), kernel_type,
285283
self.degree, self.gamma, self.coef0, self.tol,
@@ -378,7 +376,7 @@ def fit(self, X, y, class_weight={}, **params):
378376
X = np.asanyarray(X, dtype=np.float64, order='C')
379377
y = np.asanyarray(y, dtype=np.int32, order='C')
380378

381-
self.raw_coef_, self.label_ = _liblinear.train_wrap(X, y,
379+
self.raw_coef_, self.label_ = liblinear.train_wrap(X, y,
382380
self._get_solver_type(), self.tol,
383381
self._get_bias(), self.C,
384382
self.class_weight_label, self.class_weight)
@@ -402,7 +400,7 @@ def predict(self, X):
402400

403401
coef = self.raw_coef_
404402

405-
return _liblinear.predict_wrap(X, coef,
403+
return liblinear.predict_wrap(X, coef,
406404
self._get_solver_type(),
407405
self.tol, self.C,
408406
self.class_weight_label,
@@ -427,7 +425,7 @@ def decision_function(self, X):
427425
X = np.atleast_2d(np.asanyarray(X, dtype=np.float64, order='C'))
428426
self._check_n_features(X)
429427

430-
dec_func = _liblinear.decision_function_wrap(
428+
dec_func = liblinear.decision_function_wrap(
431429
X, self.raw_coef_, self._get_solver_type(), self.tol,
432430
self.C, self.class_weight_label, self.class_weight,
433431
self.label_, self._get_bias())
@@ -479,4 +477,4 @@ def _get_bias(self):
479477
return -1.0
480478

481479

482-
set_verbosity_wrap(0)
480+
libsvm.set_verbosity_wrap(0)

scikits/learn/svm/libsvm.py renamed to scikits/learn/svm/classes.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,79 @@
1-
21
from ..base import ClassifierMixin, RegressorMixin
3-
from .base import BaseLibSVM
2+
from ..linear_model.base import CoefSelectTransformerMixin
3+
from .base import BaseLibLinear, BaseLibSVM
4+
5+
6+
class LinearSVC(BaseLibLinear, ClassifierMixin, CoefSelectTransformerMixin):
7+
"""Linear Support Vector Classification.
8+
9+
Similar to SVC with parameter kernel='linear', but uses internally
10+
liblinear rather than libsvm, so it has more flexibility in the
11+
choice of penalties and loss functions and should be faster for
12+
huge datasets.
13+
14+
Parameters
15+
----------
16+
loss : string, 'l1' or 'l2' (default 'l2')
17+
Specifies the loss function. With 'l1' it is the standard SVM
18+
loss (a.k.a. hinge Loss) while with 'l2' it is the squared loss.
19+
(a.k.a. squared hinge Loss)
20+
21+
penalty : string, 'l1' or 'l2' (default 'l2')
22+
Specifies the norm used in the penalization. The 'l2'
23+
penalty is the standard used in SVC. The 'l1' leads to coef_
24+
vectors that are sparse.
25+
26+
dual : bool, (default True)
27+
Select the algorithm to either solve the dual or primal
28+
optimization problem.
29+
30+
tol: float, optional
31+
tolerance for stopping criteria
32+
33+
multi_class: boolean, optional
34+
perform multi-class SVM by Cramer and Singer. If active,
35+
options loss, penalty and dual will be ignored.
36+
37+
intercept_scaling : float, default: 1
38+
when self.fit_intercept is True, instance vector x becomes
39+
[x, self.intercept_scaling],
40+
i.e. a "synthetic" feature with constant value equals to
41+
intercept_scaling is appended to the instance vector.
42+
The intercept becomes intercept_scaling * synthetic feature weight
43+
Note! the synthetic feature weight is subject to l1/l2 regularization
44+
as all other features.
45+
To lessen the effect of regularization on synthetic feature weight
46+
(and therefore on the intercept) intercept_scaling has to be increased
47+
48+
Attributes
49+
----------
50+
`coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features]
51+
Weights asigned to the features (coefficients in the primal
52+
problem). This is only available in the case of linear kernel.
53+
54+
`intercept_` : array, shape = [1] if n_classes == 2 else [n_classes]
55+
Constants in decision function.
56+
57+
Notes
58+
-----
59+
The underlying C implementation uses a random number generator to
60+
select features when fitting the model. It is thus not uncommon,
61+
to have slightly different results for the same input data. If
62+
that happens, try with a smaller tol parameter.
63+
64+
See also
65+
--------
66+
SVC
67+
68+
References
69+
----------
70+
LIBLINEAR -- A Library for Large Linear Classification
71+
http://www.csie.ntu.edu.tw/~cjlin/liblinear/
72+
73+
"""
74+
75+
# all the implementation is provided by the mixins
76+
pass
477

578

679
class SVC(BaseLibSVM, ClassifierMixin):

0 commit comments

Comments
 (0)
0