scikit-learn · neokt · Mar 4, 2017 · raghavrv · Mar 12, 2017 · jnothman
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
@@ -68,7 +68,7 @@ def test_classification():
                            Perceptron(),
                            DecisionTreeClassifier(),
                            KNeighborsClassifier(),
-                           SVC()]:
+                           SVC(gamma="scale")]:
         for params in grid:
           
10000
  BaggingClassifier(base_estimator=base_estimator,
                               random_state=rng,
@@ -309,7 +309,7 @@ def test_oob_score_classification():
                                                         iris.target,
                                                         random_state=rng)
 
-    for base_estimator in [DecisionTreeClassifier(), SVC()]:
+    for base_estimator in [DecisionTreeClassifier(), SVC(gamma="scale")]:
         clf = BaggingClassifier(base_estimator=base_estimator,
                                 n_estimators=100,
                                 bootstrap=True,
@@ -493,7 +493,7 @@ def test_gridsearch():
     parameters = {'n_estimators': (1, 2),
                   'base_estimator__C': (1, 2)}
 
-    GridSearchCV(BaggingClassifier(SVC()),
+    GridSearchCV(BaggingClassifier(SVC(gamma="scale")),
                  parameters,
                  scoring="roc_auc").fit(X, y)
 

diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -287,7 +287,7 @@ def test_base_estimator():
     clf = AdaBoostClassifier(RandomForestClassifier())
     clf.fit(X, y_regr)
 
-    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
+    clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME")
     clf.fit(X, y_class)
 
     from sklearn.ensemble import RandomForestRegressor
@@ -302,7 +302,7 @@ def test_base_estimator():
     # Check that an empty discrete ensemble fails in fit, not predict.
     X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
     y_fail = ["foo", "bar", 1, 2]
-    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
+    clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME")
     assert_raises_regexp(ValueError, "worse than random",
                          clf.fit, X_fail, y_fail)
 

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
@@ -743,7 +743,7 @@ class GridSearchCV(BaseSearchCV):
     >>> from sklearn import svm, grid_search, datasets
     >>> iris = datasets.load_iris()
     >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
-    >>> svr = svm.SVC()
+    >>> svr = svm.SVC(gamma="scale")
     >>> clf = grid_search.GridSearchCV(svr, parameters)
     >>> clf.fit(iris.data, iris.target)
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
@@ -180,11 +180,11 @@ def test_check_scoring_gridsearchcv():
     # test that check_scoring works on GridSearchCV and pipeline.
     # slightly redundant non-regression test.
 
-    grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]})
+    grid = GridSearchCV(LinearSVC(gamma="scale"), param_grid={'C': [.1, 1]})
     scorer = check_scoring(grid, "f1")
     assert_true(isinstance(scorer, _PredictScorer))
 
-    pipe = make_pipeline(LinearSVC())
+    pipe = make_pipeline(LinearSVC(gamma="scale"))
     scorer = check_scoring(pipe, "f1")
     assert_true(isinstance(scorer, _PredictScorer))
 

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -814,7 +814,7 @@ class GridSearchCV(BaseSearchCV):
     >>> from sklearn.model_selection import GridSearchCV
     >>> iris = datasets.load_iris()
     >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
-    >>> svr = svm.SVC()
+    >>> svr = svm.SVC(gamma="scale")
     >>> clf = GridSearchCV(svr, parameters)
     >>> clf.fit(iris.data, iris.target)
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -399,7 +399,7 @@ def test_grid_search_one_grid_point():
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
     param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}
 
-    clf = SVC()
+    clf = SVC(gamma="scale")
     cv = GridSearchCV(clf, param_dict)
     cv.fit(X_, y_)
 
@@ -423,30 +423,30 @@ def test_grid_search_when_param_grid_includes_range():
 
 def test_grid_search_bad_param_grid():
     param_dict = {"C": 1.0}
-    clf = SVC()
+    clf = SVC(gamma="scale")
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a sequence"
         "(but not a string) or np.ndarray.",
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": []}
-    clf = SVC()
+    clf = SVC(gamma="scale")
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a non-empty sequence.",
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": "1,2,3"}
-    clf = SVC()
+    clf = SVC(gamma="scale")
     assert_raise_message(
         ValueError,
         "Parameter values for parameter (C) need to be a sequence"
         "(but not a string) or np.ndarray.",
         GridSearchCV, clf, param_dict)
 
     param_dict = {"C": np.ones(6).reshape(3, 2)}
-    clf = SVC()
+    clf = SVC(gamma="scale")
     assert_raises(ValueError, GridSearchCV, clf, param_dict)
 
 
@@ -742,10 +742,10 @@ def test_grid_search_cv_results():
     n_grid_points = 6
     params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
               dict(kernel=['poly', ], degree=[1, 2])]
-    grid_search = GridSearchCV(SVC(), cv=n_splits, iid=False,
+    grid_search = GridSearchCV(SVC(gamma="scale"), cv=n_splits, iid=False,
                                param_grid=params)
     grid_search.fit(X, y)
-    grid_search_iid = GridSearchCV(SVC(), cv=n_splits, iid=True,
+    grid_search_iid = GridSearchCV(SVC(gamma="scale"), cv=n_splits, iid=True,
                                    param_grid=params)
     grid_search_iid.fit(X, y)
 
@@ -802,11 +802,11 @@ def test_random_search_cv_results():
     n_splits = 3
     n_search_iter = 30
     params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
-    random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+    random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=n_search_iter,
                                        cv=n_splits, iid=False,
                                        param_distributions=params)
   
F438
  random_search.fit(X, y)
-    random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+    random_search_iid = RandomizedSearchCV(SVC(gamma="scale"), n_iter=n_search_iter,
                                            cv=n_splits, iid=True,
                                            param_distributions=params)
     random_search_iid.fit(X, y)
@@ -850,8 +850,8 @@ def test_search_iid_param():
     # create "cv" for splits
     cv = [[mask, ~mask], [~mask, mask]]
     # once with iid=True (default)
-    grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv)
-    random_search = RandomizedSearchCV(SVC(), n_iter=2,
+    grid_search = GridSearchCV(SVC(gamma="scale"), param_grid={'C': [1, 10]}, cv=cv)
+    random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=2,
                                        param_distributions={'C': [1, 10]},
                                        cv=cv)
     for search in (grid_search, random_search):
@@ -893,10 +893,10 @@ def test_search_iid_param():
         assert_almost_equal(train_std, 0)
 
     # once with iid=False
-    grid_search = GridSearchCV(SVC(),
+    grid_search = GridSearchCV(SVC(gamma="scale"),
                                param_grid={'C': [1, 10]},
                                cv=cv, iid=False)
-    random_search = RandomizedSearchCV(SVC(), n_iter=2,
+    random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=2,
                                        param_distributions={'C': [1, 10]},
                                        cv=cv, iid=False)
 
@@ -936,8 +936,8 @@ def test_search_cv_results_rank_tie_breaking():
     # which would result in a tie of their mean cv-scores
     param_grid = {'C': [1, 1.001, 0.001]}
 
-    grid_search = GridSearchCV(SVC(), param_grid=param_grid)
-    random_search = RandomizedSearchCV(SVC(), n_iter=3,
+    grid_search = GridSearchCV(SVC(gamma="scale"), param_grid=param_grid)
+    random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=3,
                                        param_distributions=param_grid)
 
     for search in (grid_search, random_search):

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -168,9 +168,14 @@ def fit(self, X, y, sample_weight=None):
                              "%r vs %r\n"
                              "Note: Sparse matrices cannot be indexed w/"
                              "boolean masks (use `indices=True` in CV)."
-                             % (sample_weight.shape, X.shape))
-
-        if self.gamma == 'auto':
+                             % (sample_weight.shape, X.shape))        
+
+        if self.gamma == 'scale':
+            self._gamma = 1.0 / (X.shape[1] * X.std())
+        elif self.gamma == 'auto':
+            warnings.warn("The default gamma parameter value 'auto', calculated as 1 / n_features,"
+                " is depreciated in version 0.19 and will be replaced by 'scale'," 
+                " calculated as 1 / (n_features * X.std()) in version 0.21.", DeprecationWarning)
             self._gamma = 1.0 / X.shape[1]
         else:
             self._gamma = self.gamma

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -418,7 +418,14 @@ class SVC(BaseSVC):
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+        If gamma is 'auto' then 1/n_features will be used.
+        If gamma is 'scale' then 1/(n_features * X.std()) will be used.
+        The current default 'auto' is deprecated in version 0.19 and will 
+        be replaced by 'scale' in version 0.21.
+
+        .. versionchanged:: 0.19
+            Default parameter value 'auto' is deprecated and will be replaced by 
+            'scale' in version 0.21
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -572,7 +579,14 @@ class NuSVC(BaseSVC):
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+        If gamma is 'auto' then 1/n_features will be used.
+        If gamma is 'scale' then 1/(n_features * X.std()) will be used.
+        The current default 'auto' is deprecated in version 0.19 and will 
+        be replaced by 'scale' in version 0.21.
+
+        .. versionchanged:: 0.19
+            Default parameter value 'auto' is deprecated and will be replaced by 
+            'scale' in version 0.21
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -725,7 +739,14 @@ class SVR(BaseLibSVM, RegressorMixin):
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+        If gamma is 'auto' then 1/n_features will be used.
+        If gamma is 'scale' then 1/(n_features * X.std()) will be used.
+        The current default 'auto' is deprecated in version 0.19 and will 
+        be replaced by 'scale' in version 0.21.
+
+        .. versionchanged:: 0.19
+            Default parameter value 'auto' is deprecated and will be replaced by 
+            'scale' in version 0.21
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -840,7 +861,14 @@ class NuSVR(BaseLibSVM, RegressorMixin):
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+        If gamma is 'auto' then 1/n_features will be used.
+        If gamma is 'scale' then 1/(n_features * X.std()) will be used.
+        The current default 'auto' is deprecated in version 0.19 and will 
+        be replaced by 'scale' in version 0.21.
+
+        .. versionchanged:: 0.19
+            Default parameter value 'auto' is deprecated and will be replaced by 
+            'scale' in version 0.21
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -949,7 +977,14 @@ class OneClassSVM(BaseLibSVM):
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+        If gamma is 'auto' then 1/n_features will be used.
+        If gamma is 'scale' then 1/(n_features * X.std()) will be used.
+        The current default 'auto' is deprecated in version 0.19 and will 
+        be replaced by 'scale' in version 0.21.
+
+        .. versionchanged:: 0.19
+            Default parameter value 'auto' is deprecated and will be replaced by 
+            'scale' in version 0.21
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.

diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
@@ -181,7 +181,7 @@ def test_error():
     Y2 = Y[:-1]  # wrong dimensions for labels
     assert_raises(ValueError, clf.fit, X_sp, Y2)
 
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     clf.fit(X_sp, Y)
     assert_array_equal(clf.predict(T), true_result)
 
@@ -238,7 +238,7 @@ def test_weight():
     X_ = sparse.csr_matrix(X_)
     for clf in (linear_model.LogisticRegression(),
                 svm.LinearSVC(random_state=0),
-                svm.SVC()):
+                svm.SVC(gamma="scale")):
         clf.set_params(class_weight={0: 5})
         clf.fit(X_[:180], y_[:180])
         y_pred = clf.predict(X_[180:])
@@ -247,7 +247,7 @@ def test_weight():
 
 def test_sample_weights():
     # Test weights on individual samples
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     clf.fit(X_sp, Y)
     assert_array_equal(clf.predict([X[2]]), [1.])
 

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -405,7 +405,7 @@ def test_weight():
                                  weights=[0.833, 0.167], random_state=2)
 
     for clf in (linear_model.LogisticRegression(),
-                svm.LinearSVC(random_state=0), svm.SVC()):
+                svm.LinearSVC(random_state=0), svm.SVC(gamma="scale")):
         clf.set_params(class_weight={0: .1, 1: 10})
         clf.fit(X_[:100], y_[:100])
         y_pred = clf.predict(X_[100:])
@@ -415,7 +415,7 @@ def test_weight():
 def test_sample_weights():
     # Test weights on individual samples
     # TODO: check on NuSVR, OneClass, etc.
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     clf.fit(X, Y)
     assert_array_equal(clf.predict([X[2]]), [1.])
 
@@ -424,7 +424,7 @@ def test_sample_weights():
     assert_array_equal(clf.predict([X[2]]), [2.])
 
     # test that rescaling all samples is the same as changing C
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     clf.fit(X, Y)
     dual_coef_no_weight = clf.dual_coef_
     clf.set_params(C=100)
@@ -472,7 +472,7 @@ def test_bad_input():
     assert_raises(ValueError, clf.fit, X, Y2)
 
     # Test with arrays that are non-contiguous.
-    for clf in (svm.SVC(), svm.LinearSVC(random_state=0)):
+    for clf in (svm.SVC(gamma="scale"), svm.LinearSVC(random_state=0)):
         Xf = np.asfortranarray(X)
         assert_false(Xf.flags['C_CONTIGUOUS'])
         yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
@@ -487,18 +487,18 @@ def test_bad_input():
     assert_raises(ValueError, clf.fit, X, Y)
 
     # sample_weight bad dimensions
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     assert_raises(ValueError, clf.fit, X, Y, sample_weight=range(len(X) - 1))
 
     # predict with sparse input when trained with dense
-    clf = svm.SVC().fit(X, Y)
+    clf = svm.SVC(gamma="scale").fit(X, Y)
     assert_raises(ValueError, clf.predict, sparse.lil_matrix(X))
 
     Xt = np.array(X).T
     clf.fit(np.dot(X, Xt), Y)
     assert_raises(ValueError, clf.predict, X)
 
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     clf.fit(X, Y)
     assert_raises(ValueError, clf.predict, Xt)
 
@@ -844,7 +844,7 @@ def test_timeout():
 def test_unfitted():
     X = "foo!"  # input validation not required when SVM not fitted
 
-    clf = svm.SVC()
+    clf = svm.SVC(gamma="scale")
     assert_raises_regexp(Exception, r".*\bSVC\b.*\bnot\b.*\bfitted\b",
                          clf.predict, X)
 
@@ -974,3 +974,21 @@ def test_ovr_decision_function():
     # Test if the first point has lower decision value on every quadrant
     # compared to the second point
     assert_true(np.all(pred_class_deci_val[:, 0] < pred_class_deci_val[:, 1]))
+
+def test_gamma_auto():
+    X, y = [[0.0], [1.0]], [0, 1]
+
+    msg = ("The default gamma parameter value 'auto', calculated as 1 / n_features,"
+        " is depreciated in version 0.19 and will be replaced by 'scale',"
+        " calculated as 1 / (n_features * X.std()) in version 0.21.")
+
+    assert_warns_message(DeprecationWarning,
+                         msg,
+                         svm.SVC(gamma='auto').fit, X, y)
+
+def test_gamma_scale():
+    X, y = [[0.0], [1.0]], [0, 1]
+
+    clf = svm.SVC(gamma='scale').fit(X, y)
+    assert_equal(clf._gamma, 2.0)
+