8000 MAINT Clean up depreacted "log" loss of SGDClassifier for 1.3 by jeremiedbb · Pull Request #25865 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MAINT Clean up depreacted "log" loss of SGDClassifier for 1.3 #25865

8000 New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 5 additions & 21 deletions sklearn/linear_model/_stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,6 @@ def _more_validate_params(self, for_partial_fit=False):
self._get_penalty_type(self.penalty)
self._get_learning_rate_type(self.learning_rate)

# TODO(1.3): remove "log"
if self.loss == "log":
warnings.warn(
"The loss 'log' was deprecated in v1.1 and will be removed in version "
"1.3. Use `loss='log_loss'` which is equivalent.",
FutureWarning,
)

def _get_loss_function(self, loss):
"""Get concrete ``LossFunction`` object for str ``loss``."""
loss_ = self.loss_functions[loss]
Expand Down Expand Up @@ -501,13 +493,11 @@ def _get_plain_sgd_function(input_dtype):

class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta):

# TODO(1.3): Remove "log""
loss_functions = {
"hinge": (Hinge, 1.0),
"squared_hinge": (SquaredHinge, 1.0),
"perceptron": (Hinge, 0.0),
"log_loss": (Log,),
"log": (Log,),
"modified_huber": (ModifiedHuber,),
"squared_error": (SquaredLoss,),
"huber": (Huber, DEFAULT_EPSILON),
Expand All @@ -517,7 +507,7 @@ class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta):

_parameter_constraints: dict = {
8000 **BaseSGD._parameter_constraints,
"loss": [StrOptions(set(loss_functions), deprecated={"log"})],
"loss": [StrOptions(set(loss_functions))],
"early_stopping": ["boolean"],
"validation_fraction": [Interval(Real, 0, 1, closed="neither")],
"n_iter_no_change": [Interval(Integral, 1, None, closed="left")],
Expand Down Expand Up @@ -950,15 +940,15 @@ class SGDClassifier(BaseSGDClassifier):

Parameters
----------
loss : {'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',\
loss : {'hinge', 'log_loss', 'modified_huber', 'squared_hinge',\
'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',\
'squared_epsilon_insensitive'}, default='hinge'
The loss function to be used.

- 'hinge' gives a linear SVM.
- 'log_loss' gives logistic regression, a probabilistic classifier.
- 'modified_huber' is another smooth loss that brings tolerance to
outliers as well as probability estimates.
outliers as well as probability estimates.
- 'squared_hinge' is like hinge but is quadratically penalized.
- 'perceptron' is the linear loss used by the perceptron algorithm.
- The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and
Expand All @@ -969,10 +959,6 @@ class SGDClassifier(BaseSGDClassifier):
More details about the losses formulas can be found in the
:ref:`User Guide <sgd_mathematical_formulation>`.

.. deprecated:: 1.1
The loss 'log' was deprecated in v1.1 and will be removed
in version 1.3. Use `loss='log_loss'` which is equivalent.

penalty : {'l2', 'l1', 'elasticnet', None}, default='l2'
The penalty (aka regularization term) to be used. Defaults to 'l2'
which is the standard regularizer for linear SVM models. 'l1' and
Expand Down Expand Up @@ -1249,8 +1235,7 @@ def __init__(
)

def _check_proba(self):
# TODO(1.3): Remove "log"
if self.loss not in ("log_loss", "log", "modified_huber"):
if self.loss not in ("log_loss", "modified_huber"):
raise AttributeError(
"probability estimates are not available for loss=%r" % self.loss
)
Expand Down Expand Up @@ -1295,8 +1280,7 @@ def predict_proba(self, X):
"""
check_is_fitted(self)

# TODO(1.3): Remove "log"
if self.loss in ("log_loss", "log"):
if self.loss == "log_loss":
return self._predict_proba_lr(X)

elif self.loss == "modified_huber":
Expand Down
26 changes: 1 addition & 25 deletions sklearn/linear_model/tests/test_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,8 +716,7 @@ def test_sgd_predict_proba_method_access(klass):
# details.
for loss in linear_model.SGDClassifier.loss_functions:
< A1D1 span class='blob-code-inner blob-code-marker ' data-code-marker=" "> clf = SGDClassifier(loss=loss)
# TODO(1.3): Remove "log"
if loss in ("log_loss", "log", "modified_huber"):
if loss in ("log_loss", "modified_huber"):
assert hasattr(clf, "predict_proba")
assert hasattr(clf, "predict_log_proba")
else:
Expand Down Expand Up @@ -2060,29 +2059,6 @@ def test_SGDClassifier_fit_for_all_backends(backend):
assert_array_almost_equal(clf_sequential.coef_, clf_parallel.coef_)


# TODO(1.3): Remove
@pytest.mark.parametrize(
"old_loss, new_loss, Estimator",
[
("log", "log_loss", linear_model.SGDClassifier),
],
)
def test_loss_deprecated(old_loss, new_loss, Estimator):

# Note: class BaseSGD calls self._validate_params() in __init__, therefore
# even instantiation of class raises FutureWarning for deprecated losses.
with pytest.warns(FutureWarning, match=f"The loss '{old_loss}' was deprecated"):
est1 = Estimator(loss=old_loss, random_state=0)
est1.fit(X, Y)

est2 = Estimator(loss=new_loss, random_state=0)
est2.fit(X, Y)
if hasattr(est1, "predict_proba"):
assert_allclose(est1.predict_proba(X), est2.predict_proba(X))
else:
assert_allclose(est1.predict(X), est2.predict(X))


@pytest.mark.parametrize(
"Estimator", [linear_model.SGDClassifier, linear_model.SGDRegressor]
)
Expand Down
0