8000 alpha clarification · scikit-learn/scikit-learn@66e4a92 · GitHub
[go: up one dir, main page]

Skip to content

Commit 66e4a92

Browse files
committed
alpha clarification
1 parent 4a3d751 commit 66e4a92

File tree

3 files changed

+18
-16
lines changed

3 files changed

+18
-16
lines changed

sklearn/linear_model/logistic.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,6 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
698698
X, target, sample_weight, 'log', 1. / C, max_iter, tol,
699699
verbose, random_state, False, max_squared_sum,
700700
warm_start_sag)
701-
w0 = warm_start_sag['coef']
702701
else:
703702
raise ValueError("solver must be one of {'liblinear', 'lbfgs', "
704703
"'newton-cg', 'sag'}, got '%s' instead" % solver)

sklearn/linear_model/sag.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,20 @@
1414
from .sag_fast import sag, get_max_squared_sum
1515

1616

17-
def get_auto_step_size(max_squared_sum, alpha, loss, fit_intercept):
17+
def get_auto_step_size(max_squared_sum, alpha_scaled, loss, fit_intercept):
1818
"""Compute automatic step size for SAG solver
1919
20-
The step size is set to 1 / (alpha + L + fit_intercept) where L is
20+
The step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is
2121
the max sum of squares for over all samples.
2222
2323
Parameters
2424
----------
2525
max_squared_sum : float
2626
Maximum squared sum of X over samples.
2727
28-
alpha : float
29-
Constant that multiplies the regularization term. Defaults to 0.0001
28+
alpha_scaled : float
29+
Constant that multiplies the regularization term, scaled by
30+
1. / n_samples, the number of samples.
3031
3132
loss : string, in {"log", "squared"}
3233
The loss function used in SAG solver.
@@ -43,16 +44,17 @@ def get_auto_step_size(max_squared_sum, alpha, loss, fit_intercept):
4344
"""
4445
if loss == 'log':
4546
# inverse Lipschitz constant for log loss
46-
return 4.0 / (max_squared_sum + int(fit_intercept) + 4.0 * alpha)
47+
return 4.0 / (max_squared_sum + int(fit_intercept)
48+
+ 4.0 * alpha_scaled)
4749
elif loss == 'squared':
4850
# inverse Lipschitz constant for squared loss
49-
return 1.0 / (max_squared_sum + int(fit_intercept) + alpha)
51+
return 1.0 / (max_squared_sum + int(fit_intercept) + alpha_scaled)
5052
else:
5153
raise ValueError("Unknown loss function for SAG solver, got %s "
5254
"instead of 'log' or 'squared'" % loss)
5355

5456

55-
def sag_solver(X, y, sample_weight=None, loss='log', alpha=1e-4,
57+
def sag_solver(X, y, sample_weight=None, loss='log', alpha=1.,
5658
max_iter=1000, tol=0.001, verbose=0, random_state=None,
5759
check_input=True, max_squared_sum=None,
5860
warm_start_mem=dict()):
@@ -91,7 +93,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1e-4,
9193
'squared' is used for regression, like in Ridge.
9294
9395
alpha : float, optional
94-
Constant that multiplies the regularization term. Defaults to 0.0001
96+
Constant that multiplies the regularization term. Defaults to 1.
9597
9698
max_iter: int, optional
9799
The max number of passes over the training data if the stopping
@@ -177,7 +179,8 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1e-4,
177179
y = check_array(y, dtype=np.float64, ensure_2d=False, order='C')
178180

179181
n_samples, n_features = X.shape[0], X.shape[1]
180-
alpha = float(alpha) / n_samples
182+
# As in SGD, the alpha is scaled by n_samples.
183+
alpha_scaled = float(alpha) / n_samples
181184

182185
# initialization
183186
if sample_weight is None:
@@ -226,19 +229,19 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1e-4,
226229

227230
if max_squared_sum is None:
228231
max_squared_sum = get_max_squared_sum(X)
229-
step_size = get_auto_step_size(max_squared_sum, alpha, loss,
232+
step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,
230233
fit_intercept)
231234

232-
if step_size * alpha == 1:
235+
if step_size * alpha_scaled == 1:
233236
raise ZeroDivisionError("Current sag implementation does not handle "
234-
"the case step_size * alpha == 1")
237+
"the case step_size * alpha_scaled == 1")
235238

236239
if loss == 'log':
237240
class_loss = Log()
238241
elif loss == 'squared':
239242
class_loss = SquaredLoss()
240243
else:
241-
raise ValueError("Invalid sparseness parameter: got %r instead of "
244+
raise ValueError("Invalid loss parameter: got %r instead of "
242245
"one of ('log', 'squared')" % loss)
243246

244247
intercept_, num_seen, n_iter_, intercept_sum_gradient = \
@@ -247,7 +250,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1e-4,
247250
n_features, tol,
248251
max_iter,
249252
class_loss,
250-
step_size, alpha,
253+
step_size, alpha_scaled,
251254
sum_gradient_init.ravel(),
252255
gradient_memory_init.ravel(),
253256
seen_init.ravel(),

sklearn/linear_model/tests/test_sag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ def test_step_size_alpha_error():
714714
fit_intercept = False
715715
alpha = 1.
716716
msg = ("Current sag implementation does not handle the case"
717-
" step_size * alpha == 1")
717+
" step_size * alpha_scaled == 1")
718718

719719
clf1 = LogisticRegression(solver='sag', C=1. / alpha,
720720
fit_intercept=fit_intercept)

0 commit comments

Comments
 (0)
0