8000 [WIP] gamma=auto in SVC #8361 by neokt · Pull Request #8535 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[WIP] gamma=auto in SVC #8361 #8535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions sklearn/ensemble/tests/test_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_classification():
Perceptron(),
DecisionTreeClassifier(),
KNeighborsClassifier(),
SVC()]:
SVC(gamma="scale")]:
for params in grid:
BaggingClassifier(base_estimator=base_estimator,
random_state=rng,
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_oob_score_classification():
iris.target,
random_state=rng)

for base_estimator in [DecisionTreeClassifier(), SVC()]:
for base_estimator in [DecisionTreeClassifier(), SVC(gamma="scale")]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can use this PR to also set a random_state, to reduce diff of #8563.

It will anyway have a merge conflict for all these lines in that PR...

@jnothman Ok with you?

clf = BaggingClassifier(base_estimator=base_estimator,
n_estimators=100,
bootstrap=True,
Expand Down Expand Up @@ -493,7 +493,7 @@ def test_gridsearch():
parameters = {'n_estimators': (1, 2),
'base_estimator__C': (1, 2)}

GridSearchCV(BaggingClassifier(SVC()),
GridSearchCV(BaggingClassifier(SVC(gamma="scale")),
parameters,
scoring="roc_auc").fit(X, y)

Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/tests/test_weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def test_base_estimator():
clf = AdaBoostClassifier(RandomForestClassifier())
clf.fit(X, y_regr)

clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME")
clf.fit(X, y_class)

from sklearn.ensemble import RandomForestRegressor
Expand All @@ -302,7 +302,7 @@ def test_base_estimator():
# Check that an empty discrete ensemble fails in fit, not predict.
X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
y_fail = ["foo", "bar", 1, 2]
clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME")
assert_raises_regexp(ValueError, "worse than random",
clf.fit, X_fail, y_fail)

Expand Down
2 changes: 1 addition & 1 deletion sklearn/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ class GridSearchCV(BaseSearchCV):
>>> from sklearn import svm, grid_search, datasets
>>> iris = datasets.load_iris()
>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
>>> svr = svm.SVC()
>>> svr = svm.SVC(gamma="scale")
>>> clf = grid_search.GridSearchCV(svr, parameters)
>>> clf.fit(iris.data, iris.target)
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
Expand Down
4 changes: 2 additions & 2 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ def test_check_scoring_gridsearchcv():
# test that check_scoring works on GridSearchCV and pipeline.
# slightly redundant non-regression test.

grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]})
grid = GridSearchCV(LinearSVC(gamma="scale"), param_grid={'C': [.1, 1]})
scorer = check_scoring(grid, "f1")
assert_true(isinstance(scorer, _PredictScorer))

pipe = make_pipeline(LinearSVC())
pipe = make_pipeline(LinearSVC(gamma="scale"))
scorer = check_scoring(pipe, "f1")
assert_true(isinstance(scorer, _PredictScorer))

Expand Down
2 changes: 1 addition & 1 deletion sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ class GridSearchCV(BaseSearchCV):
>>> from sklearn.model_selection import GridSearchCV
>>> iris = datasets.load_iris()
>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
>>> svr = svm.SVC()
>>> svr = svm.SVC(gamma="scale")
>>> clf = GridSearchCV(svr, parameters)
>>> clf.fit(iris.data, iris.target)
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
Expand Down
30 changes: 15 additions & 15 deletions sklearn/model_selection/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def test_grid_search_one_grid_point():
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}

clf = SVC()
clf = SVC(gamma="scale")
cv = GridSearchCV(clf, param_dict)
cv.fit(X_, y_)

Expand All @@ -423,30 +423,30 @@ def test_grid_search_when_param_grid_includes_range():

def test_grid_search_bad_param_grid():
param_dict = {"C": 1.0}
clf = SVC()
clf = SVC(gamma="scale")
assert_raise_message(
ValueError,
"Parameter values for parameter (C) need to be a sequence"
"(but not a string) or np.ndarray.",
GridSearchCV, clf, param_dict)

param_dict = {"C": []}
clf = SVC()
clf = SVC(gamma="scale")
assert_raise_message(
ValueError,
"Parameter values for parameter (C) need to be a non-empty sequence.",
GridSearchCV, clf, param_dict)

param_dict = {"C": "1,2,3"}
clf = SVC()
clf = SVC(gamma="scale")
assert_raise_message(
ValueError,
"Parameter values for parameter (C) need to be a sequence"
"(but not a string) or np.ndarray.",
GridSearchCV, clf, param_dict)

param_dict = {"C": np.ones(6).reshape(3, 2)}
clf = SVC()
clf = SVC(gamma="scale")
assert_raises(ValueError, GridSearchCV, clf, param_dict)


Expand Down Expand Up @@ -742,10 +742,10 @@ def test_grid_search_cv_results():
n_grid_points = 6
params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
dict(kernel=['poly', ], degree=[1, 2])]
grid_search = GridSearchCV(SVC(), cv=n_splits, iid=False,
grid_search = GridSearchCV(SVC(gamma="scale"), cv=n_splits, iid=False,
param_grid=params)
grid_search.fit(X, y)
grid_search_iid = GridSearchCV(SVC(), cv=n_splits, iid=True,
grid_search_iid = GridSearchCV(SVC(gamma="scale"), cv=n_splits, iid=True,
param_grid=params)
grid_search_iid.fit(X, y)

Expand Down Expand Up @@ -802,11 +802,11 @@ def test_random_search_cv_results():
n_splits = 3
n_search_iter = 30
params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=n_search_iter,
cv=n_splits, iid=False,
param_distributions=params)
random_search.fit(X, y)
random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
random_search_iid = RandomizedSearchCV(SVC(gamma="scale"), n_iter=n_search_iter,
cv=n_splits, iid=True,
param_distributions=params)
random_search_iid.fit(X, y)
Expand Down Expand Up @@ -850,8 +850,8 @@ def test_search_iid_param():
# create "cv" for splits
cv = [[mask, ~mask], [~mask, mask]]
# once with iid=True (default)
grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv)
random_search = RandomizedSearchCV(SVC(), n_iter=2,
grid_search = GridSearchCV(SVC(gamma="scale"), param_grid={'C': [1, 10]}, cv=cv)
random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=2,
param_distributions={'C': [1, 10]},
cv=cv)
for search in (grid_search, random_search):
Expand Down Expand Up @@ -893,10 +893,10 @@ def test_search_iid_param():
assert_almost_equal(train_std, 0)

# once with iid=False
grid_search = GridSearchCV(SVC(),
grid_search = GridSearchCV(SVC(gamma="scale"),
param_grid={'C': [1, 10]},
cv=cv, iid=False)
random_search = RandomizedSearchCV(SVC(), n_iter=2,
random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=2,
param_distributions={'C': [1, 10]},
cv=cv, iid=False)

Expand Down Expand Up @@ -936,8 +936,8 @@ def test_search_cv_results_rank_tie_breaking():
# which would result in a tie of their mean cv-scores
param_grid = {'C': [1, 1.001, 0.001]}

grid_search = GridSearchCV(SVC(), param_grid=param_grid)
random_search = RandomizedSearchCV(SVC(), n_iter=3,
grid_search = GridSearchCV(SVC(gamma="scale"), param_grid=param_grid)
random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=3,
param_distributions=param_grid)

for search in (grid_search, random_search):
Expand Down
11 changes: 8 additions & 3 deletions sklearn/svm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,14 @@ def fit(self, X, y, sample_weight=None):
"%r vs %r\n"
"Note: Sparse matrices cannot be indexed w/"
"boolean masks (use `indices=True` in CV)."
% (sample_weight.shape, X.shape))

if self.gamma == 'auto':
% (sample_weight.shape, X.shape))

if self.gamma == 'scale':
self._gamma = 1.0 / (X.shape[1] * X.std())
elif self.gamma == 'auto':
warnings.warn("The default gamma parameter value 'auto', calculated as 1 / n_features,"
" is depreciated in version 0.19 and will be replaced by 'scale',"
" calculated as 1 / (n_features * X.std()) in version 0.21.", DeprecationWarning)
self._gamma = 1.0 / X.shape[1]
else:
self._gamma = self.gamma
Expand Down
45 changes: 40 additions & 5 deletions sklearn/svm/classes.py
F987
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,14 @@ class SVC(BaseSVC):

gamma : float, optional (default='auto')
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
If gamma is 'auto' then 1/n_features will be used instead.
If gamma is 'auto' then 1/n_features will be used.
If gamma is 'scale' then 1/(n_features * X.std()) will be used.
The current default 'auto' is deprecated in version 0.19 and will
be replaced by 'scale' in version 0.21.

.. versionchanged:: 0.19
Default parameter value 'auto' is deprecated and will be replaced by
'scale' in version 0.21

coef0 : float, optional (default=0.0)
Independent term in kernel function.
Expand Down Expand Up @@ -572,7 +579,14 @@ class NuSVC(BaseSVC):

gamma : float, optional (default='auto')
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
If gamma is 'auto' then 1/n_features will be used instead.
If gamma is 'auto' then 1/n_features will be used.
If gamma is 'scale' then 1/(n_features * X.std()) will be used.
The current default 'auto' is deprecated in version 0.19 and will
be replaced by 'scale' in version 0.21.

.. versionchanged:: 0.19
Default parameter value 'auto' is deprecated and will be replaced by
'scale' in version 0.21

coef0 : float, optional (default=0.0)
Independent term in kernel function.
Expand Down Expand Up @@ -725,7 +739,14 @@ class SVR(BaseLibSVM, RegressorMixin):

gamma : float, optional (default='auto')
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
If gamma is 'auto' then 1/n_features will be used instead.
If gamma is 'auto' then 1/n_features will be used.
If gamma is 'scale' then 1/(n_features * X.std()) will be used.
The current default 'auto' is deprecated in version 0.19 and will
be replaced by 'scale' in version 0.21.

.. versionchanged:: 0.19
Default parameter value 'auto' is deprecated and will be replaced by
'scale' in version 0.21

coef0 : float, optional (default=0.0)
Independent term in kernel function.
Expand Down Expand Up @@ -840,7 +861,14 @@ class NuSVR(BaseLibSVM, RegressorMixin):

gamma : float, optional (default='auto')
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
If gamma is 'auto' then 1/n_features will be used instead.
If gamma is 'auto' then 1/n_features will be used.
If gamma is 'scale' then 1/(n_features * X.std()) will be used.
The current default 'auto' is deprecated in version 0.19 and will
be replaced by 'scale' in version 0.21.

.. versionchanged:: 0.19
Default parameter value 'auto' is deprecated and will be replaced by
'scale' in version 0.21

coef0 : float, optional (default=0.0)
Independent term in kernel function.
Expand Down Expand Up @@ -949,7 +977,14 @@ class OneClassSVM(BaseLibSVM):

gamma : float, optional (default='auto')
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
If gamma is 'auto' then 1/n_features will be used instead.
If gamma is 'auto' then 1/n_features will be used.
If gamma is 'scale' then 1/(n_features * X.std()) will be used.
The current default 'auto' is deprecated in version 0.19 and will
be replaced by 'scale' in version 0.21.

.. versionchanged:: 0.19
Default parameter value 'auto' is deprecated and will be replaced by
'scale' in version 0.21

coef0 : float, optional (default=0.0)
Independent term in kernel function.
Expand Down
6 changes: 3 additions & 3 deletions sklearn/svm/tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_error():
Y2 = Y[:-1] # wrong dimensions for labels
assert_raises(ValueError, clf.fit, X_sp, Y2)

clf = svm.SVC()
clf = svm.SVC(gamma="scale")
clf.fit(X_sp, Y)
assert_array_equal(clf.predict(T), true_result)

Expand Down Expand Up @@ -238,7 +238,7 @@ def test_weight():
X_ = sparse.csr_matrix(X_)
for clf in (linear_model.LogisticRegression(),
svm.LinearSVC(random_state=0),
svm.SVC()):
svm.SVC(gamma="scale")):
clf.set_params(class_weight={0: 5})
clf.fit(X_[:180], y_[:180])
y_pred = clf.predict(X_[180:])
Expand All @@ -247,7 +247,7 @@ def test_weight():

def test_sample_weights():
# Test weights on individual samples
clf = svm.SVC()
clf = svm.SVC(gamma="scale")
clf.fit(X_sp, Y)
assert_array_equal(clf.predict([X[2]]), [1.])

Expand Down
34 changes: 26 additions & 8 deletions sklearn/svm/tests/test_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def test_weight():
weights=[0.833, 0.167], random_state=2)

for clf in (linear_model.LogisticRegression(),
svm.LinearSVC(random_state=0), svm.SVC()):
svm.LinearSVC(random_state=0), svm.SVC(gamma="scale")):
clf.set_params(class_weight={0: .1, 1: 10})
clf.fit(X_[:100], y_[:100])
y_pred = clf.predict(X_[100:])
Expand All @@ -415,7 +415,7 @@ def test_weight():
def test_sample_weights():
# Test weights on individual samples
# TODO: check on NuSVR, OneClass, etc.
clf = svm.SVC()
clf = svm.SVC(gamma="scale")
clf.fit(X, Y)
assert_array_equal(clf.predict([X[2]]), [1.])

Expand All @@ -424,7 +424,7 @@ def test_sample_weights():
assert_array_equal(clf.predict([X[2]]), [2.])

# test that rescaling all samples is the same as changing C
clf = svm.SVC()
clf = svm.SVC(gamma="scale")
clf.fit(X, Y)
dual_coef_no_weight = clf.dual_coef_
clf.set_params(C=100)
Expand Down Expand Up @@ -472,7 +472,7 @@ def test_bad_input():
assert_raises(ValueError, clf.fit, X, Y2)

# Test with arrays that are non-contiguous.
for clf in (svm.SVC(), svm.LinearSVC(random_state=0)):
for clf in (svm.SVC(gamma="scale"), svm.LinearSVC(random_state=0)):
Xf = np.asfortranarray(X)
assert_false(Xf.flags['C_CONTIGUOUS'])
yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
Expand All @@ -487,18 +487,18 @@ def test_bad_input():
assert_raises(ValueError, clf.fit, X, Y)

# sample_weight bad dimensions
clf = svm.SVC()
clf = svm.SVC(gamma="scale")
assert_raises(ValueError, clf.fit, X, Y, sample_weight=range(len(X) - 1))

# predict with sparse input when trained with dense
clf = svm.SVC().fit(X, Y)
clf = svm.SVC(gamma="scale").fit(X, Y)
assert_raises(ValueError, clf.predict, sparse.lil_matrix(X))

Xt = np.array(X).T
clf.fit(np.dot(X, Xt), Y)
assert_raises(ValueError, clf.predict, X)

clf = svm.SVC()
clf = svm.SVC(gamma="scale")
clf.fit(X, Y)
assert_raises(ValueError, clf.predict, Xt)

Expand Down Expand Up @@ -844,7 +844,7 @@ def test_timeout():
def test_unfitted():
X = "foo!" # input validation not required when SVM not fitted

clf = svm.SVC()
clf = svm.SVC(gamma="scale")
assert_raises_regexp(Exception, r".*\bSVC\b.*\bnot\b.*\bfitted\b",
clf.predict, X)

Expand Down Expand Up @@ -974,3 +974,21 @@ def test_ovr_decision_function():
# Test if the first point has lower decision value on every quadrant
# compared to the second point
assert_true(np.all(pred_class_deci_val[:, 0] < pred_class_deci_val[:, 1]))

def test_gamma_auto():
X, y = [[0.0], [1.0]], [0, 1]

msg = ("The default gamma parameter value 'auto', calculated as 1 / n_features,"
" is depreciated in version 0.19 and will be replaced by 'scale',"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we use "deprecated" not "depreciated"

" calculated as 1 / (n_features * X.std()) in version 0.21.")

assert_warns_message(DeprecationWarning,
msg,
svm.SVC(gamma='auto').fit, X, y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this means that a user can't intentionally pass 'auto' without receiving a warning, which isn't great. We could solve this by making the default actually 'auto_deprecated' which behaves like 'auto' with a warning. Using 'auto' explicitly would be same without warning.


def test_gamma_scale():
X, y = [[0.0], [1.0]], [0, 1]

clf = svm.SVC(gamma='scale').fit(X, y)
assert_equal(clf._gamma, 2.0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please check for more than one X


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there should be a newline at the end fo the file.

Loading
0