8000 ENH change defaults on SGD (works on digits and iris and I just guess… · pfdevilliers/scikit-learn@cd0b531 · GitHub
[go: up one dir, main page]

Skip to content

Commit cd0b531

Browse files
committed
ENH change defaults on SGD (works on digits and iris and I just guessed them).
Turned up alpha and n_iter. This corresponds to more regularization and more carefull SGD. On which kind of problems do the old defaults work?
1 parent c02938f commit cd0b531

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

sklearn/linear_model/stochastic_gradient.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class SGDClassifier(BaseSGD, ClassifierMixin, SelectorMixin):
8181
not achievable with 'l2'.
8282
8383
alpha : float
84-
Constant that multiplies the regularization term. Defaults to 0.0001
84+
Constant that multiplies the regularization term. Defaults to 0.01
8585
8686
rho : float
8787
The Elastic Net mixing parameter, with 0 < rho <= 1.
@@ -93,7 +93,7 @@ class SGDClassifier(BaseSGD, ClassifierMixin, SelectorMixin):
9393
9494
n_iter: int, optional
9595
The number of passes over the training data (aka epochs).
96-
Defaults to 5.
96+
Defaults to 20.
9797
9898
shuffle: bool, optional
9999
Whether or not the training data should be shuffled after each epoch.
@@ -154,9 +154,9 @@ class SGDClassifier(BaseSGD, ClassifierMixin, SelectorMixin):
154154
>>> clf = linear_model.SGDClassifier()
155155
>>> clf.fit(X, Y)
156156
... #doctest: +NORMALIZE_WHITESPACE
157-
SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
157+
SGDClassifier(alpha=0.01, class_weight=None, epsilon=0.1, eta0=0.0,
158158
fit_intercept=True, learning_rate='optimal', loss='hinge',
159-
n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, rho=0.85, seed=0,
159+
n_iter=20, n_jobs=1, penalty='l2', power_t=0.5, rho=0.85, seed=0,
160160
shuffle=False, verbose=0, warm_start=False)
161161
>>> print(clf.predict([[-0.8, -1]]))
162162
[1]
@@ -166,8 +166,8 @@ class SGDClassifier(BaseSGD, ClassifierMixin, SelectorMixin):
166166
LinearSVC, LogisticRegression, Perceptron
167167
168168
"""
169-
def __init__(self, loss="hinge", penalty='l2', alpha=0.0001,
170-
rho=0.85, fit_intercept=True, n_iter=5, shuffle=False,
169+
def __init__(self, loss="hinge", penalty='l2', alpha=0.01,
170+
rho=0.85, fit_intercept=True, n_iter=20, shuffle=False,
171171
verbose=0, epsilon=0.1, n_jobs=1, seed=0,
172172
learning_rate="optimal", eta0=0.0, power_t=0.5,
173173
class_weight=None, warm_start=False):
@@ -610,7 +610,7 @@ class SGDRegressor(BaseSGD, RegressorMixin, SelectorMixin):
610610
>>> X = np.random.randn(n_samples, n_features)
611611
>>> clf = linear_model.SGDRegressor()
612612
>>> clf.fit(X, y)
613-
SGDRegressor(alpha=0.0001, epsilon=0.1, eta0=0.01, fit_intercept=True,
613+
SGDRegressor(alpha=0.01, epsilon=0.1, eta0=0.01, fit_intercept=True,
614614
learning_rate='invscaling', loss='squared_loss', n_iter=5, p=None,
615615
penalty='l2', power_t=0.25, rho=0.85, seed=0, shuffle=False,
616616
verbose=0, warm_start=False)
@@ -620,7 +620,7 @@ class SGDRegressor(BaseSGD, RegressorMixin, SelectorMixin):
620620
Ridge, ElasticNet, Lasso, SVR
621621
622622
"""
623-
def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
623+
def __init__(self, loss="squared_loss", penalty="l2", alpha=0.01,
624624
rho=0.85, fit_intercept=True, n_iter=5, shuffle=False, verbose=0,
625625
epsilon=0.1, p=None, seed=0, learning_rate="invscaling", eta0=0.01,
626626
power_t=0.25, warm_start=False):

0 commit comments

Comments
 (0)
0