8000 cleaning · scikit-learn/scikit-learn@85f0ecc · GitHub
[go: up one dir, main page]

Skip to content

Commit 85f0ecc

Browse files
committed
cleaning
1 parent 1e21983 commit 85f0ecc

File tree

2 files changed

+32
-48
lines changed

2 files changed

+32
-48
lines changed

doc/modules/linear_model.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -791,9 +791,6 @@ Robust to unscaled datasets yes yes yes no no
791791
The "saga" solver is often the best choice. The "liblinear" solver is
792792
used by default for historical reasons.
793793

794-
The default solver will change to "auto" in version 0.22. This option
795-
automatically selects a solver based on the `penalty` parameter.
796-
797794
For large dataset, you may also consider using :class:`SGDClassifier`
798795
with 'log' loss.
799796

sklearn/linear_model/logistic.py

Lines changed: 32 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -424,27 +424,22 @@ def hessp(v):
424424
return grad, hessp
425425

426426

427-
def _check_solver_option(solver, multi_class, penalty, dual,
428-
previous_default_solver='liblinear'):
427+
def _check_solver_option(solver, multi_class, penalty, dual):
429428

430-
# default values raises a future warning
429+
# Default values raises a future warning
431430
if solver == 'warn':
432-
# previous_default_solver is used since LogisticRegression and
433-
# LogisticRegressionCV don't have the same default in 0.19.
434-
solver = previous_default_solver
435-
436-
# Do not warn if the 'auto' solver selects the previous default solver
437-
if previous_default_solver != 'lbfgs':
438-
warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
439-
"Use a specific solver to silence this warning.",
440-
FutureWarning)
431+
solver = 'liblinear'
432+
warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
433+
"Use a specific solver to silence this warning.",
434+
FutureWarning)
441435

442436
if multi_class == 'warn':
443437
multi_class = 'ovr'
444438
warnings.warn("Default multi_class will be changed to 'multinomial' in"
445439
" 0.22. Use a specific option to silence this warning.",
446440
FutureWarning)
447441

442+
# Check the string parameters
448443
if multi_class not in ['multinomial', 'ovr']:
449444
raise ValueError("multi_class should be either multinomial or "
450445
"ovr, got %s." % multi_class)
@@ -477,7 +472,7 @@ def _check_solver_option(solver, multi_class, penalty, dual,
477472

478473
def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
479474
max_iter=100, tol=1e-4, verbose=0,
480-
solver='warn', coef=None,
475+
solver='lbfgs', coef=None,
481476
class_weight=None, dual=False, penalty='l2',
482477
intercept_scaling=1., multi_class='warn',
483478
random_state=None, check_input=True,
@@ -527,7 +522,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
527522
For the liblinear and lbfgs solvers set verbose to any positive
528523
number for verbosity.
529524
530-
solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
525+
solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
531526
Numerical solver to use.
532527
533528
coef : array-like, shape (n_features,), default None
@@ -618,16 +613,16 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
618613
if isinstance(Cs, numbers.Integral):
619614
Cs = np.logspace(-4, 4, Cs)
620615

616+
solver, multi_class = _check_solver_option(
617+
solver, multi_class, penalty, dual)
618+
621619
# Preprocessing.
622620
if check_input:
623621
X = check_array(X, accept_sparse='csr', dtype=np.float64,
624622
accept_large_sparse=solver != 'liblinear')
625623
y = check_array(y, ensure_2d=False, dtype=None)
626624
check_consistent_length(X, y)
627-
628-
n_samples, n_features = X.shape
629-
solver, multi_class = _check_solver_option(
630-
solver, multi_class, penalty, dual, 'lbfgs')
625+
_, n_features = X.shape
631626

632627
classes = np.unique(y)
633628
random_state = check_random_state(random_state)
@@ -805,7 +800,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
805800
def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
806801
scoring=None, fit_intercept=False,
807802
max_iter=100, tol=1e-4, class_weight=None,
808-
verbose=0, solver='warn', penalty='l2',
803+
verbose=0, solver='lbfgs', penalty='l2',
809804
dual=False, intercept_scaling=1.,
810805
multi_class='warn', random_state=None,
811806
max_squared_sum=None, sample_weight=None):
@@ -867,7 +862,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
867862
For the liblinear and lbfgs solvers set verbose to any positive
868863
number for verbosity.
869864
870-
solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
865+
solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
871866
Decides which solver to use.
872867
873868
penalty : str, 'l1' or 'l2'
@@ -931,9 +926,8 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
931926
n_iter : array, shape(n_cs,)
932927
Actual number of iteration for each Cs.
933928
"""
934-
n_samples, n_features = X.shape
935929
solver, multi_class = _check_solver_option(
936-
solver, multi_class, penalty, dual, 'lbfgs')
930+
solver, multi_class, penalty, dual)
937931

938932
X_train = X[train]
939933
X_test = X[test]
@@ -1075,8 +1069,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
10751069
instance used by `np.random`. Used when ``solver`` == 'sag' or
10761070
'liblinear'.
10771071
1078-
solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
1079-
default: 'liblinear'. Will be changed to 'auto' solver in 0.22.
1072+
solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
1073+
default: 'liblinear'. Will be changed to 'lbfgs' solver in 0.22.
10801074
Algorithm to use in the optimization problem.
10811075
10821076
- For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1086,8 +1080,6 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
10861080
schemes.
10871081
- 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas
10881082
'liblinear' and 'saga' handle L1 penalty.
1089-
- 'auto' automatically chooses a solver based on the penalty
1090-
parameter.
10911083
10921084
Note that 'sag' and 'saga' fast convergence is only guaranteed on
10931085
features with approximately the same scale. You can
@@ -1097,8 +1089,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
10971089
Stochastic Average Gradient descent solver.
10981090
.. versionadded:: 0.19
10991091
SAGA solver.
1100-
.. versionadded:: 0.20
1101-
'auto' solver.
1092+
.. versionchanged:: 0.20
1093+
Default will change from 'liblinear' to 'lbfgs' in 0.22.
11021094
11031095
max_iter : int, default: 100
11041096
Useful only for the newton-cg, sag and lbfgs solvers.
@@ -1114,6 +1106,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
11141106
11151107
.. versionadded:: 0.18
11161108
Stochastic Average Gradient descent solver for 'multinomial' case.
1109+
.. versionchanged:: 0.20
1110+
Default will change from 'ovr' to 'multinomial' in 0.22.
11171111
11181112
verbose : int, default: 0
11191113
For the liblinear and lbfgs solvers set verbose to any positive
@@ -1254,8 +1248,7 @@ def fit(self, X, y, sample_weight=None):
12541248
"positive; got (tol=%r)" % self.tol)
12551249

12561250
solver, multi_class = _check_solver_option(
1257-
self.solver, self.multi_class, self.penalty, self.dual,
1258-
'liblinear')
1251+
self.solver, self.multi_class, self.penalty, self.dual)
12591252

12601253
if solver in ['newton-cg']:
12611254
_dtype = [np.float64, np.float32]
@@ -1282,7 +1275,7 @@ def fit(self, X, y, sample_weight=None):
12821275
return self
12831276

12841277
if solver in ['sag', 'saga']:
1285-
max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
1278+
max_squared_sum = row_norms(X, squared=True).max()
12861279
else:
12871280
max_squared_sum = None
12881281

@@ -1379,11 +1372,7 @@ def predict_proba(self, X):
13791372
if not hasattr(self, "coef_"):
13801373
raise NotFittedError("Call fit before prediction")
13811374

1382-
# This check can be removed in 0.22, changing back to self.multi_class
1383-
_, multi_class = _check_solver_option(
1384-
self.solver, self.multi_class, self.penalty, self.dual, 'lbfgs')
1385-
1386-
if multi_class == "ovr":
1375+
if self.multi_class == "ovr" or self.multi_class == "warn":
13871376
return super(LogisticRegression, self)._predict_proba_lr(X)
13881377
else:
13891378
decision = self.decision_function(X)
@@ -1476,8 +1465,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
14761465
that can be used, look at :mod:`sklearn.metrics`. The
14771466
default scoring option used is 'accuracy'.
14781467
1479-
solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
1480-
default: 'lbfgs'. Will be changed to 'auto' solver in 0.22.
1468+
solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
1469+
default: 'lbfgs'.
14811470
Algorithm to use in the optimization problem.
14821471
14831472
- For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1489,8 +1478,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
14891478
'liblinear' and 'saga' handle L1 penalty.
14901479
- 'liblinear' might be slower in LogisticRegressionCV because it does
14911480
not handle warm-starting.
1492-
- 'auto' automatically chooses a solver based on the penalty
1493-
parameter.
14941481
14951482
Note that 'sag' and 'saga' fast convergence is only guaranteed on
14961483
features with approximately the same scale. You can preprocess the data
@@ -1500,8 +1487,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
15001487
Stochastic Average Gradient descent solver.
15011488
.. versionadded:: 0.19
15021489
SAGA solver.
1503-
.. versionadded:: 0.20
1504-
'auto' solver.
15051490
15061491
tol : float, optional
15071492
Tolerance for stopping criteria.
@@ -1561,6 +1546,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
15611546
15621547
.. versionadded:: 0.18
15631548
Stochastic Average Gradient descent solver for 'multinomial' case.
1549+
.. versionchanged:: 0.20
1550+
Default will change from 'ovr' to 'multinomial' in 0.22.
15641551
15651552
random_state : int, RandomState instance or None, optional, default None
15661553
If int, random_state is the seed used by the random number generator;
@@ -1621,7 +1608,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
16211608
16221609
"""
16231610
def __init__(self, Cs=10, fit_intercept=True, cv='warn', dual=False,
1624-
penalty='l2', scoring=None, solver='warn', tol=1e-4,
1611+
penalty='l2', scoring=None, solver='lbfgs', tol=1e-4,
16251612
max_iter=100, class_weight=None, n_jobs=1, verbose=0,
16261613
refit=True, intercept_scaling=1., multi_class='warn',
16271614
random_state=None):
@@ -1663,7 +1650,7 @@ def fit(self, X, y, sample_weight=None):
16631650
self : object
16641651
"""
16651652
solver, multi_class = _check_solver_option(
1666-
self.solver, self.multi_class, self.penalty, self.dual, 'lbfgs')
1653+
self.solver, self.multi_class, self.penalty, self.dual)
16671654

16681655
if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
16691656
raise ValueError("Maximum number of iteration must be positive;"
@@ -1691,7 +1678,7 @@ def fit(self, X, y, sample_weight=None):
16911678
encoded_labels = label_encoder.transform(label_encoder.classes_)
16921679

16931680
if solver in ['sag', 'saga']:
1694-
max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
1681+
max_squared_sum = row_norms(X, squared=True).max()
16951682
else:
16961683
max_squared_sum = None
16971684

0 commit comments

Comments
 (0)
0